statsample 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.4.1 / 2009-09-12
2
+ * More methods and usage documentation
3
+ * Logit tests
4
+ * Bug fix: rescue for requires doesn't specify LoadError
5
+ * Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
1
6
  === 0.4.0 / 2009-09-10
2
7
  * New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
3
8
  * New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
data/lib/distribution.rb CHANGED
@@ -1,4 +1,12 @@
1
1
  require 'statistics2'
2
+ # Several distributions modules to calculate cdf, inverse cdf and pdf
3
+ # See Distribution::Pdf for interface.
4
+ #
5
+ # Usage:
6
+ # Distribution::Normal.cdf(1.96)
7
+ # => 0.97500210485178
8
+ # Distribution::Normal.p_value(0.95)
9
+ # => 1.64485364660836
2
10
  module Distribution
3
11
  autoload(:ChiSquare, 'distribution/chisquare')
4
12
  autoload(:T, 'distribution/t')
@@ -1,8 +1,7 @@
1
1
  module Distribution
2
2
  # Calculate cdf and inverse cdf for Chi Square Distribution.
3
3
  #
4
- # Based on Babatunde, Iyiola & Eni () :
5
- # "A Numerical Procedure for Computing Chi-Square Percentage Points"
4
+ # Based on Statistics2 module
6
5
  #
7
6
  module ChiSquare
8
7
  class << self
data/lib/statsample.rb CHANGED
@@ -48,13 +48,13 @@ end
48
48
  begin
49
49
  require 'gettext'
50
50
  rescue LoadError
51
- def bindtextdomain(d)
51
+ def bindtextdomain(d) #:nodoc:
52
52
  d
53
53
  end
54
54
 
55
55
  # Bored module
56
- module GetText
57
- def _(t)
56
+ module GetText #:nodoc:
57
+ def _(t)
58
58
  t
59
59
  end
60
60
  end
@@ -81,7 +81,7 @@ end
81
81
  #
82
82
  module Statsample
83
83
 
84
- VERSION = '0.4.0'
84
+ VERSION = '0.4.1'
85
85
  SPLIT_TOKEN = ","
86
86
  autoload(:Database, 'statsample/converters')
87
87
  autoload(:Anova, 'statsample/anova')
@@ -90,9 +90,9 @@ module Statsample
90
90
  autoload(:PlainText, 'statsample/converters')
91
91
  autoload(:Excel, 'statsample/converters')
92
92
  autoload(:GGobi, 'statsample/converters')
93
- autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
93
+ autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
94
94
  autoload(:HtmlReport, 'statsample/htmlreport')
95
- autoload(:Mx, 'statsample/converters')
95
+ autoload(:Mx, 'statsample/converters')
96
96
  autoload(:Resample, 'statsample/resample')
97
97
  autoload(:SRS, 'statsample/srs')
98
98
  autoload(:Codification, 'statsample/codification')
@@ -193,7 +193,7 @@ module Statsample
193
193
  end
194
194
 
195
195
  g=general_averages
196
- t.add_horizontal_line
196
+ t.add_horizontal_line
197
197
 
198
198
  row=[_("Overall averages"),"",""]+@fields.collect{|f|
199
199
  sprintf("%0.3f",g[f])
@@ -69,7 +69,7 @@ class DominanceAnalysis
69
69
  out.extend report_type
70
70
  out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
71
71
  out.add _("Sample size: %d\n") % @n_samples
72
- t=Distribution::T.p_value(1-((1-alfa) / 2),@n_samples - 1)
72
+ t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
73
73
  out.add "t:#{t}\n"
74
74
  out.add "Linear Regression Engine: #{@lr_class.name}"
75
75
  out.nl
@@ -89,8 +89,8 @@ end
89
89
 
90
90
  # replaces all key and fill classes with similar ones, without opacity
91
91
  # this allows rendering of svg and png on rox and gqview without problems
92
- module SVG
93
- module Graph
92
+ module SVG #:nodoc:
93
+ module Graph
94
94
  class BarNoOp < Bar
95
95
  def get_css; SVG::Graph.get_css_standard; end
96
96
  end
@@ -56,8 +56,8 @@ module Statsample
56
56
  end
57
57
  # Creates a zero matrix Mx1, with M=x.M
58
58
  def set_default_parameters(x)
59
- fd=x.column_size.times.collect{|i| 0.0}
60
- fd.push(0.1) if self.is_a? Statsample::MLE::Normal
59
+ fd=[0.0]*x.column_size
60
+ fd.push(0.1) if self.is_a? Statsample::MLE::Normal
61
61
  parameters = Matrix.columns([fd])
62
62
  end
63
63
 
@@ -2,13 +2,39 @@
2
2
  module Statsample
3
3
  module Regression
4
4
  module Binomial
5
+ # Create a Logit model object.
6
+ # ds:: Dataset
7
+ # y:: Name of dependent vector
8
+ # Use
9
+ # dataset=Statsample::CSV.read("data.csv")
10
+ # y="y"
11
+ # lr=Statsample::Regression::Binomial.logit(dataset,y)
12
+ #
5
13
  def self.logit(ds,y_var)
6
14
  Logit.new(ds,y_var)
7
15
  end
16
+ # Create a Probit model object.
17
+ # ds:: Dataset
18
+ # y:: Name of dependent vector
19
+ # Use
20
+ # dataset=Statsample::CSV.read("data.csv")
21
+ # y="y"
22
+ # lr=Statsample::Regression::Binomial.probit(dataset,y)
23
+ #
24
+
8
25
  def self.probit(ds,y_var)
9
26
  Probit.new(ds,y_var)
10
27
  end
11
-
28
+ # Base Engine for binomial regression analysis.
29
+ # See Statsample::Regression::Binomial.logit() and
30
+ # Statsample::Regression::Binomial.probit for fast
31
+ # access methods.
32
+ #
33
+ # Use:
34
+ # dataset=Statsample::CSV.read("data.csv")
35
+ # y="y"
36
+ # model=Statsample::MLE::Logit.new
37
+ # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
12
38
  class BaseEngine
13
39
  attr_reader :log_likehood, :iterations
14
40
  def initialize(ds,y_var,model)
@@ -6,10 +6,10 @@ end
6
6
 
7
7
  module Statsample
8
8
  class << self
9
- # Create a matrix using vectors as columns
9
+ # Create a matrix using vectors as columns.
10
10
  # Use:
11
11
  #
12
- # matrix=Statsample.vector_cols_matrix(v1,v2)
12
+ # matrix=Statsample.vector_cols_matrix(v1,v2)
13
13
  def vector_cols_matrix(*vs)
14
14
  # test
15
15
  size=vs[0].size
@@ -23,7 +23,7 @@ module Statsample
23
23
  end
24
24
  end
25
25
  # Returns a duplicate of the input vectors, without missing data
26
- # for any of the vectors
26
+ # for any of the vectors.
27
27
  #
28
28
  # a=[1,2,3,6,7,nil,3,5].to_vector(:scale)
29
29
  # b=[nil,nil,5,6,4,5,10,2].to_vector(:scale)
@@ -89,8 +89,8 @@ class Vector
89
89
  def dup
90
90
  Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
91
91
  end
92
- # Returns an empty duplicate of the vector. Maintains the type, missing
93
- # values, labels
92
+ # Returns an empty duplicate of the vector. Maintains the type,
93
+ # missing values and labels.
94
94
  def dup_empty
95
95
  Vector.new([],@type,@missing_values.dup,@labels.dup)
96
96
  end
@@ -123,7 +123,7 @@ class Vector
123
123
 
124
124
  alias_method :standarized, :vector_standarized
125
125
 
126
- def box_cox_transformation(lambda)
126
+ def box_cox_transformation(lambda) # :nodoc:
127
127
  raise "Should be a scale" unless @type==:scale
128
128
  @data_with_nils.collect{|x|
129
129
  if !x.nil?
@@ -162,21 +162,20 @@ class Vector
162
162
  end
163
163
  # Modifies current vector, with data modified by block.
164
164
  # Equivalent to #collect! on @data
165
-
166
165
  def recode!
167
166
  @data.collect!{|x|
168
167
  yield x
169
168
  }
170
169
  set_valid_data
171
170
  end
172
- # Iterate on each item
171
+ # Iterate on each item.
173
172
  # Equivalent to
174
173
  # @data.each{|x| yield x}
175
174
  def each
176
175
  @data.each{|x| yield(x) }
177
176
  end
178
177
 
179
- # Iterate on each item_index
178
+ # Iterate on each item, retrieving index
180
179
 
181
180
  def each_index
182
181
  (0...@data.size).each {|i|
@@ -185,16 +184,27 @@ class Vector
185
184
  end
186
185
  # Add a value at the end of the vector.
187
186
  # If second argument set to false, you should update the Vector usign
188
- # Vector#set_valid_data at the end of your insertion cycle
187
+ # Vector.set_valid_data at the end of your insertion cycle
189
188
  #
190
189
  def add(v,update_valid=true)
191
190
  @data.push(v)
192
191
  set_valid_data if update_valid
193
192
  end
194
193
  # Update valid_data, missing_data, data_with_nils and gsl
195
- # at the end of an insertion
194
+ # at the end of an insertion.
196
195
  #
197
- # Use after add(v,false)
196
+ # Use after Vector.add(v,false)
197
+ # Usage:
198
+ # v=Statsample::Vector.new
199
+ # v.add(2,false)
200
+ # v.add(4,false)
201
+ # v.data
202
+ # => [2,3]
203
+ # v.valid_data
204
+ # => []
205
+ # v.set_valid_data
206
+ # v.valid_data
207
+ # => [2,3]
198
208
  def set_valid_data
199
209
  @valid_data.clear
200
210
  @missing_data.clear
@@ -203,17 +213,17 @@ class Vector
203
213
  set_valid_data_intern
204
214
  set_scale_data if(@type==:scale)
205
215
  end
206
-
216
+
207
217
  if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
208
- def set_valid_data_intern
218
+ def set_valid_data_intern #:nodoc:
209
219
  Statsample::STATSAMPLE__.set_valid_data_intern(self)
210
220
  end
211
221
  else
212
- def set_valid_data_intern
222
+ def set_valid_data_intern #:nodoc:
213
223
  _set_valid_data_intern
214
224
  end
215
225
  end
216
- def _set_valid_data_intern
226
+ def _set_valid_data_intern #:nodoc:
217
227
  @data.each do |n|
218
228
  if is_valid? n
219
229
  @valid_data.push(n)
@@ -225,15 +235,17 @@ class Vector
225
235
  end
226
236
  @has_missing_data=@missing_data.size>0
227
237
  end
228
-
238
+
229
239
  # Retrieves true if data has one o more missing values
230
240
  def has_missing_data?
231
241
  @has_missing_data
232
242
  end
243
+ # Retrieves label for value x. Retrieves x if
244
+ # no label defined.
233
245
  def labeling(x)
234
246
  @labels.has_key?(x) ? @labels[x].to_s : x.to_s
235
247
  end
236
- # Returns a Vector with the data with labels replaced by the label.
248
+ # Returns a Vector with data with labels replaced by the label.
237
249
  def vector_labeled
238
250
  d=@data.collect{|x|
239
251
  if @labels.has_key? x
@@ -273,11 +285,11 @@ class Vector
273
285
  @type=t
274
286
  set_scale_data if(t==:scale)
275
287
  end
276
-
277
288
  def to_a
278
289
  @data.dup
279
290
  end
280
291
  alias_method :to_ary, :to_a
292
+
281
293
  # Vector sum.
282
294
  # - If v is a scalar, add this value to all elements
283
295
  # - If v is a Array or a Vector, should be of the same size of this vector
@@ -296,8 +308,8 @@ class Vector
296
308
  def -(v)
297
309
  _vector_ari("-",v)
298
310
  end
299
- # Reports all values that doesn't comply with a condition
300
- # Returns a hash with the index of data and the invalid data
311
+ # Reports all values that doesn't comply with a condition.
312
+ # Returns a hash with the index of data and the invalid data.
301
313
  def verify
302
314
  h={}
303
315
  (0...@data.size).to_a.each{|i|
@@ -401,7 +413,7 @@ class Vector
401
413
  # only with valid data.
402
414
  #
403
415
  # In all the trails, every item have the same probability
404
- # of been selected
416
+ # of been selected.
405
417
  def sample_with_replacement(sample=1)
406
418
  if(@type!=:scale or !HAS_GSL)
407
419
  vds=@valid_data.size
@@ -414,8 +426,9 @@ class Vector
414
426
  # Returns an random sample of size n, without replacement,
415
427
  # only with valid data.
416
428
  #
417
- # Every element could only be selected once
418
- # A sample of the same size of the vector is the vector itself
429
+ # Every element could only be selected once.
430
+ #
431
+ # A sample of the same size of the vector is the vector itself.
419
432
 
420
433
  def sample_without_replacement(sample=1)
421
434
  if(@type!=:scale or !HAS_GSL)
@@ -432,6 +445,11 @@ class Vector
432
445
  r.choose(@gsl, sample).to_a
433
446
  end
434
447
  end
448
+ # Retrieves number of cases which comply condition.
449
+ # If block given, retrieves number of instances where
450
+ # block returns true.
451
+ # If other values given, retrieves the frequency for
452
+ # this value.
435
453
  def count(x=false)
436
454
  if block_given?
437
455
  r=@data.inject(0) {|s, i|
@@ -443,7 +461,8 @@ class Vector
443
461
  frequencies[x].nil? ? 0 : frequencies[x]
444
462
  end
445
463
  end
446
- # returns the database type for the vector, according to its content
464
+
465
+ # Returns the database type for the vector, according to its content
447
466
 
448
467
  def db_type(dbs='mysql')
449
468
  # first, detect any character not number
@@ -465,11 +484,12 @@ class Vector
465
484
  true
466
485
  end
467
486
  end
487
+
468
488
  def to_s
469
489
  sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
470
490
  end
471
491
  # Ugly name. Really, create a Vector for standard 'matrix' package.
472
- # <tt>dir</tt> could. be :horizontal or :vertical
492
+ # <tt>dir</tt> could be :horizontal or :vertical
473
493
  def to_matrix(dir=:horizontal)
474
494
  case dir
475
495
  when :horizontal
@@ -481,9 +501,7 @@ class Vector
481
501
  def inspect
482
502
  self.to_s
483
503
  end
484
- def as_r
485
- @data.dup
486
- end
504
+ # Retrieves uniques values for data.
487
505
  def factors
488
506
  if @type==:scale
489
507
  @scale_data.uniq.sort
@@ -492,17 +510,17 @@ class Vector
492
510
  end
493
511
  end
494
512
  if Statsample::STATSAMPLE__.respond_to?(:frequencies)
495
- # Returns a hash with the distribution of frecuencies of
513
+ # Returns a hash with the distribution of frecuencies for
496
514
  # the sample
497
515
  def frequencies
498
516
  Statsample::STATSAMPLE__.frequencies(@valid_data)
499
517
  end
500
518
  else
501
- def frequencies
519
+ def frequencies #:nodoc:
502
520
  _frequencies
503
521
  end
504
522
  end
505
- def _frequencies
523
+ def _frequencies #:nodoc:
506
524
  @valid_data.inject(Hash.new) {|a,x|
507
525
  a[x]||=0
508
526
  a[x]=a[x]+1
@@ -589,7 +607,8 @@ class Vector
589
607
  end
590
608
  def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
591
609
  Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
592
- end
610
+ end
611
+
593
612
  self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
594
613
  met_or=met.gsub("_slow","")
595
614
  if !self.method_defined?(met_or)
@@ -672,8 +691,7 @@ class Vector
672
691
  # The arithmetical mean of data
673
692
  def mean
674
693
  check_type :scale
675
-
676
- sum.to_f.quo(n_valid)
694
+ sum.to_f.quo(n_valid)
677
695
  end
678
696
  # Sum of squares for the data around a value.
679
697
  # By default, this value is the mean
@@ -31,8 +31,8 @@ class StatsampleCombinationTestCase < Test::Unit::TestCase
31
31
  rb.each{|y|
32
32
  rb_array.push(y)
33
33
  }
34
- assert(gsl.d.is_a? Statsample::Combination::CombinationGsl)
35
- assert(rb.d.is_a? Statsample::Combination::CombinationRuby)
34
+ assert(gsl.d.is_a?(Statsample::Combination::CombinationGsl))
35
+ assert(rb.d.is_a?(Statsample::Combination::CombinationRuby))
36
36
 
37
37
  assert_equal(rb_array,gsl_array)
38
38
  else
@@ -4,7 +4,7 @@ require 'test/unit'
4
4
  begin
5
5
  require 'rbgsl'
6
6
  NOT_GSL=false
7
- rescue
7
+ rescue LoadError
8
8
  NOT_GSL=true
9
9
  end
10
10
  class DistributionTestCase < Test::Unit::TestCase
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-10 00:00:00 -04:00
12
+ date: 2009-09-12 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency