statsample 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,8 @@
1
+ === 0.4.1 / 2009-09-12
2
+ * More methods and usage documentation
3
+ * Logit tests
4
+ * Bug fix: rescue for requires doesn't specify LoadError
5
+ * Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
1
6
  === 0.4.0 / 2009-09-10
2
7
  * New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
3
8
  * New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
data/lib/distribution.rb CHANGED
@@ -1,4 +1,12 @@
1
1
  require 'statistics2'
2
+ # Several distributions modules to calculate cdf, inverse cdf and pdf
3
+ # See Distribution::Pdf for interface.
4
+ #
5
+ # Usage:
6
+ # Distribution::Normal.cdf(1.96)
7
+ # => 0.97500210485178
8
+ # Distribution::Normal.p_value(0.95)
9
+ # => 1.64485364660836
2
10
  module Distribution
3
11
  autoload(:ChiSquare, 'distribution/chisquare')
4
12
  autoload(:T, 'distribution/t')
@@ -1,8 +1,7 @@
1
1
  module Distribution
2
2
  # Calculate cdf and inverse cdf for Chi Square Distribution.
3
3
  #
4
- # Based on Babatunde, Iyiola & Eni () :
5
- # "A Numerical Procedure for Computing Chi-Square Percentage Points"
4
+ # Based on Statistics2 module
6
5
  #
7
6
  module ChiSquare
8
7
  class << self
data/lib/statsample.rb CHANGED
@@ -48,13 +48,13 @@ end
48
48
  begin
49
49
  require 'gettext'
50
50
  rescue LoadError
51
- def bindtextdomain(d)
51
+ def bindtextdomain(d) #:nodoc:
52
52
  d
53
53
  end
54
54
 
55
55
  # Bored module
56
- module GetText
57
- def _(t)
56
+ module GetText #:nodoc:
57
+ def _(t)
58
58
  t
59
59
  end
60
60
  end
@@ -81,7 +81,7 @@ end
81
81
  #
82
82
  module Statsample
83
83
 
84
- VERSION = '0.4.0'
84
+ VERSION = '0.4.1'
85
85
  SPLIT_TOKEN = ","
86
86
  autoload(:Database, 'statsample/converters')
87
87
  autoload(:Anova, 'statsample/anova')
@@ -90,9 +90,9 @@ module Statsample
90
90
  autoload(:PlainText, 'statsample/converters')
91
91
  autoload(:Excel, 'statsample/converters')
92
92
  autoload(:GGobi, 'statsample/converters')
93
- autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
93
+ autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
94
94
  autoload(:HtmlReport, 'statsample/htmlreport')
95
- autoload(:Mx, 'statsample/converters')
95
+ autoload(:Mx, 'statsample/converters')
96
96
  autoload(:Resample, 'statsample/resample')
97
97
  autoload(:SRS, 'statsample/srs')
98
98
  autoload(:Codification, 'statsample/codification')
@@ -193,7 +193,7 @@ module Statsample
193
193
  end
194
194
 
195
195
  g=general_averages
196
- t.add_horizontal_line
196
+ t.add_horizontal_line
197
197
 
198
198
  row=[_("Overall averages"),"",""]+@fields.collect{|f|
199
199
  sprintf("%0.3f",g[f])
@@ -69,7 +69,7 @@ class DominanceAnalysis
69
69
  out.extend report_type
70
70
  out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
71
71
  out.add _("Sample size: %d\n") % @n_samples
72
- t=Distribution::T.p_value(1-((1-alfa) / 2),@n_samples - 1)
72
+ t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
73
73
  out.add "t:#{t}\n"
74
74
  out.add "Linear Regression Engine: #{@lr_class.name}"
75
75
  out.nl
@@ -89,8 +89,8 @@ end
89
89
 
90
90
  # replaces all key and fill classes with similar ones, without opacity
91
91
  # this allows rendering of svg and png on rox and gqview without problems
92
- module SVG
93
- module Graph
92
+ module SVG #:nodoc:
93
+ module Graph
94
94
  class BarNoOp < Bar
95
95
  def get_css; SVG::Graph.get_css_standard; end
96
96
  end
@@ -56,8 +56,8 @@ module Statsample
56
56
  end
57
57
  # Creates a zero matrix Mx1, with M=x.M
58
58
  def set_default_parameters(x)
59
- fd=x.column_size.times.collect{|i| 0.0}
60
- fd.push(0.1) if self.is_a? Statsample::MLE::Normal
59
+ fd=[0.0]*x.column_size
60
+ fd.push(0.1) if self.is_a? Statsample::MLE::Normal
61
61
  parameters = Matrix.columns([fd])
62
62
  end
63
63
 
@@ -2,13 +2,39 @@
2
2
  module Statsample
3
3
  module Regression
4
4
  module Binomial
5
+ # Create a Logit model object.
6
+ # ds:: Dataset
7
+ # y:: Name of dependent vector
8
+ # Use
9
+ # dataset=Statsample::CSV.read("data.csv")
10
+ # y="y"
11
+ # lr=Statsample::Regression::Binomial.logit(dataset,y)
12
+ #
5
13
  def self.logit(ds,y_var)
6
14
  Logit.new(ds,y_var)
7
15
  end
16
+ # Create a Probit model object.
17
+ # ds:: Dataset
18
+ # y:: Name of dependent vector
19
+ # Use
20
+ # dataset=Statsample::CSV.read("data.csv")
21
+ # y="y"
22
+ # lr=Statsample::Regression::Binomial.probit(dataset,y)
23
+ #
24
+
8
25
  def self.probit(ds,y_var)
9
26
  Probit.new(ds,y_var)
10
27
  end
11
-
28
+ # Base Engine for binomial regression analysis.
29
+ # See Statsample::Regression::Binomial.logit() and
30
+ # Statsample::Regression::Binomial.probit for fast
31
+ # access methods.
32
+ #
33
+ # Use:
34
+ # dataset=Statsample::CSV.read("data.csv")
35
+ # y="y"
36
+ # model=Statsample::MLE::Logit.new
37
+ # lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
12
38
  class BaseEngine
13
39
  attr_reader :log_likehood, :iterations
14
40
  def initialize(ds,y_var,model)
@@ -6,10 +6,10 @@ end
6
6
 
7
7
  module Statsample
8
8
  class << self
9
- # Create a matrix using vectors as columns
9
+ # Create a matrix using vectors as columns.
10
10
  # Use:
11
11
  #
12
- # matrix=Statsample.vector_cols_matrix(v1,v2)
12
+ # matrix=Statsample.vector_cols_matrix(v1,v2)
13
13
  def vector_cols_matrix(*vs)
14
14
  # test
15
15
  size=vs[0].size
@@ -23,7 +23,7 @@ module Statsample
23
23
  end
24
24
  end
25
25
  # Returns a duplicate of the input vectors, without missing data
26
- # for any of the vectors
26
+ # for any of the vectors.
27
27
  #
28
28
  # a=[1,2,3,6,7,nil,3,5].to_vector(:scale)
29
29
  # b=[nil,nil,5,6,4,5,10,2].to_vector(:scale)
@@ -89,8 +89,8 @@ class Vector
89
89
  def dup
90
90
  Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
91
91
  end
92
- # Returns an empty duplicate of the vector. Maintains the type, missing
93
- # values, labels
92
+ # Returns an empty duplicate of the vector. Maintains the type,
93
+ # missing values and labels.
94
94
  def dup_empty
95
95
  Vector.new([],@type,@missing_values.dup,@labels.dup)
96
96
  end
@@ -123,7 +123,7 @@ class Vector
123
123
 
124
124
  alias_method :standarized, :vector_standarized
125
125
 
126
- def box_cox_transformation(lambda)
126
+ def box_cox_transformation(lambda) # :nodoc:
127
127
  raise "Should be a scale" unless @type==:scale
128
128
  @data_with_nils.collect{|x|
129
129
  if !x.nil?
@@ -162,21 +162,20 @@ class Vector
162
162
  end
163
163
  # Modifies current vector, with data modified by block.
164
164
  # Equivalent to #collect! on @data
165
-
166
165
  def recode!
167
166
  @data.collect!{|x|
168
167
  yield x
169
168
  }
170
169
  set_valid_data
171
170
  end
172
- # Iterate on each item
171
+ # Iterate on each item.
173
172
  # Equivalent to
174
173
  # @data.each{|x| yield x}
175
174
  def each
176
175
  @data.each{|x| yield(x) }
177
176
  end
178
177
 
179
- # Iterate on each item_index
178
+ # Iterate on each item, retrieving index
180
179
 
181
180
  def each_index
182
181
  (0...@data.size).each {|i|
@@ -185,16 +184,27 @@ class Vector
185
184
  end
186
185
  # Add a value at the end of the vector.
187
186
  # If second argument set to false, you should update the Vector usign
188
- # Vector#set_valid_data at the end of your insertion cycle
187
+ # Vector.set_valid_data at the end of your insertion cycle
189
188
  #
190
189
  def add(v,update_valid=true)
191
190
  @data.push(v)
192
191
  set_valid_data if update_valid
193
192
  end
194
193
  # Update valid_data, missing_data, data_with_nils and gsl
195
- # at the end of an insertion
194
+ # at the end of an insertion.
196
195
  #
197
- # Use after add(v,false)
196
+ # Use after Vector.add(v,false)
197
+ # Usage:
198
+ # v=Statsample::Vector.new
199
+ # v.add(2,false)
200
+ # v.add(4,false)
201
+ # v.data
202
+ # => [2,3]
203
+ # v.valid_data
204
+ # => []
205
+ # v.set_valid_data
206
+ # v.valid_data
207
+ # => [2,3]
198
208
  def set_valid_data
199
209
  @valid_data.clear
200
210
  @missing_data.clear
@@ -203,17 +213,17 @@ class Vector
203
213
  set_valid_data_intern
204
214
  set_scale_data if(@type==:scale)
205
215
  end
206
-
216
+
207
217
  if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
208
- def set_valid_data_intern
218
+ def set_valid_data_intern #:nodoc:
209
219
  Statsample::STATSAMPLE__.set_valid_data_intern(self)
210
220
  end
211
221
  else
212
- def set_valid_data_intern
222
+ def set_valid_data_intern #:nodoc:
213
223
  _set_valid_data_intern
214
224
  end
215
225
  end
216
- def _set_valid_data_intern
226
+ def _set_valid_data_intern #:nodoc:
217
227
  @data.each do |n|
218
228
  if is_valid? n
219
229
  @valid_data.push(n)
@@ -225,15 +235,17 @@ class Vector
225
235
  end
226
236
  @has_missing_data=@missing_data.size>0
227
237
  end
228
-
238
+
229
239
  # Retrieves true if data has one o more missing values
230
240
  def has_missing_data?
231
241
  @has_missing_data
232
242
  end
243
+ # Retrieves label for value x. Retrieves x if
244
+ # no label defined.
233
245
  def labeling(x)
234
246
  @labels.has_key?(x) ? @labels[x].to_s : x.to_s
235
247
  end
236
- # Returns a Vector with the data with labels replaced by the label.
248
+ # Returns a Vector with data with labels replaced by the label.
237
249
  def vector_labeled
238
250
  d=@data.collect{|x|
239
251
  if @labels.has_key? x
@@ -273,11 +285,11 @@ class Vector
273
285
  @type=t
274
286
  set_scale_data if(t==:scale)
275
287
  end
276
-
277
288
  def to_a
278
289
  @data.dup
279
290
  end
280
291
  alias_method :to_ary, :to_a
292
+
281
293
  # Vector sum.
282
294
  # - If v is a scalar, add this value to all elements
283
295
  # - If v is a Array or a Vector, should be of the same size of this vector
@@ -296,8 +308,8 @@ class Vector
296
308
  def -(v)
297
309
  _vector_ari("-",v)
298
310
  end
299
- # Reports all values that doesn't comply with a condition
300
- # Returns a hash with the index of data and the invalid data
311
+ # Reports all values that doesn't comply with a condition.
312
+ # Returns a hash with the index of data and the invalid data.
301
313
  def verify
302
314
  h={}
303
315
  (0...@data.size).to_a.each{|i|
@@ -401,7 +413,7 @@ class Vector
401
413
  # only with valid data.
402
414
  #
403
415
  # In all the trails, every item have the same probability
404
- # of been selected
416
+ # of been selected.
405
417
  def sample_with_replacement(sample=1)
406
418
  if(@type!=:scale or !HAS_GSL)
407
419
  vds=@valid_data.size
@@ -414,8 +426,9 @@ class Vector
414
426
  # Returns an random sample of size n, without replacement,
415
427
  # only with valid data.
416
428
  #
417
- # Every element could only be selected once
418
- # A sample of the same size of the vector is the vector itself
429
+ # Every element could only be selected once.
430
+ #
431
+ # A sample of the same size of the vector is the vector itself.
419
432
 
420
433
  def sample_without_replacement(sample=1)
421
434
  if(@type!=:scale or !HAS_GSL)
@@ -432,6 +445,11 @@ class Vector
432
445
  r.choose(@gsl, sample).to_a
433
446
  end
434
447
  end
448
+ # Retrieves number of cases which comply condition.
449
+ # If block given, retrieves number of instances where
450
+ # block returns true.
451
+ # If other values given, retrieves the frequency for
452
+ # this value.
435
453
  def count(x=false)
436
454
  if block_given?
437
455
  r=@data.inject(0) {|s, i|
@@ -443,7 +461,8 @@ class Vector
443
461
  frequencies[x].nil? ? 0 : frequencies[x]
444
462
  end
445
463
  end
446
- # returns the database type for the vector, according to its content
464
+
465
+ # Returns the database type for the vector, according to its content
447
466
 
448
467
  def db_type(dbs='mysql')
449
468
  # first, detect any character not number
@@ -465,11 +484,12 @@ class Vector
465
484
  true
466
485
  end
467
486
  end
487
+
468
488
  def to_s
469
489
  sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
470
490
  end
471
491
  # Ugly name. Really, create a Vector for standard 'matrix' package.
472
- # <tt>dir</tt> could. be :horizontal or :vertical
492
+ # <tt>dir</tt> could be :horizontal or :vertical
473
493
  def to_matrix(dir=:horizontal)
474
494
  case dir
475
495
  when :horizontal
@@ -481,9 +501,7 @@ class Vector
481
501
  def inspect
482
502
  self.to_s
483
503
  end
484
- def as_r
485
- @data.dup
486
- end
504
+ # Retrieves uniques values for data.
487
505
  def factors
488
506
  if @type==:scale
489
507
  @scale_data.uniq.sort
@@ -492,17 +510,17 @@ class Vector
492
510
  end
493
511
  end
494
512
  if Statsample::STATSAMPLE__.respond_to?(:frequencies)
495
- # Returns a hash with the distribution of frecuencies of
513
+ # Returns a hash with the distribution of frecuencies for
496
514
  # the sample
497
515
  def frequencies
498
516
  Statsample::STATSAMPLE__.frequencies(@valid_data)
499
517
  end
500
518
  else
501
- def frequencies
519
+ def frequencies #:nodoc:
502
520
  _frequencies
503
521
  end
504
522
  end
505
- def _frequencies
523
+ def _frequencies #:nodoc:
506
524
  @valid_data.inject(Hash.new) {|a,x|
507
525
  a[x]||=0
508
526
  a[x]=a[x]+1
@@ -589,7 +607,8 @@ class Vector
589
607
  end
590
608
  def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
591
609
  Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
592
- end
610
+ end
611
+
593
612
  self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
594
613
  met_or=met.gsub("_slow","")
595
614
  if !self.method_defined?(met_or)
@@ -672,8 +691,7 @@ class Vector
672
691
  # The arithmetical mean of data
673
692
  def mean
674
693
  check_type :scale
675
-
676
- sum.to_f.quo(n_valid)
694
+ sum.to_f.quo(n_valid)
677
695
  end
678
696
  # Sum of squares for the data around a value.
679
697
  # By default, this value is the mean
@@ -31,8 +31,8 @@ class StatsampleCombinationTestCase < Test::Unit::TestCase
31
31
  rb.each{|y|
32
32
  rb_array.push(y)
33
33
  }
34
- assert(gsl.d.is_a? Statsample::Combination::CombinationGsl)
35
- assert(rb.d.is_a? Statsample::Combination::CombinationRuby)
34
+ assert(gsl.d.is_a?(Statsample::Combination::CombinationGsl))
35
+ assert(rb.d.is_a?(Statsample::Combination::CombinationRuby))
36
36
 
37
37
  assert_equal(rb_array,gsl_array)
38
38
  else
@@ -4,7 +4,7 @@ require 'test/unit'
4
4
  begin
5
5
  require 'rbgsl'
6
6
  NOT_GSL=false
7
- rescue
7
+ rescue LoadError
8
8
  NOT_GSL=true
9
9
  end
10
10
  class DistributionTestCase < Test::Unit::TestCase
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: statsample
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Claudio Bustos
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-09-10 00:00:00 -04:00
12
+ date: 2009-09-12 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency