statsample 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/lib/distribution.rb +8 -0
- data/lib/distribution/chisquare.rb +1 -2
- data/lib/statsample.rb +6 -6
- data/lib/statsample/dominanceanalysis.rb +1 -1
- data/lib/statsample/dominanceanalysis/bootstrap.rb +1 -1
- data/lib/statsample/graph/svggraph.rb +2 -2
- data/lib/statsample/mle.rb +2 -2
- data/lib/statsample/regression/binomial.rb +27 -1
- data/lib/statsample/vector.rb +53 -35
- data/test/test_combination.rb +2 -2
- data/test/test_distribution.rb +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
=== 0.4.1 / 2009-09-12
|
2
|
+
* More methods and usage documentation
|
3
|
+
* Logit tests
|
4
|
+
* Bug fix: rescue for requires doesn't specify LoadError
|
5
|
+
* Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
|
1
6
|
=== 0.4.0 / 2009-09-10
|
2
7
|
* New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
|
3
8
|
* New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
|
data/lib/distribution.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
require 'statistics2'
|
2
|
+
# Several distributions modules to calculate cdf, inverse cdf and pdf
|
3
|
+
# See Distribution::Pdf for interface.
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
# Distribution::Normal.cdf(1.96)
|
7
|
+
# => 0.97500210485178
|
8
|
+
# Distribution::Normal.p_value(0.95)
|
9
|
+
# => 1.64485364660836
|
2
10
|
module Distribution
|
3
11
|
autoload(:ChiSquare, 'distribution/chisquare')
|
4
12
|
autoload(:T, 'distribution/t')
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Distribution
|
2
2
|
# Calculate cdf and inverse cdf for Chi Square Distribution.
|
3
3
|
#
|
4
|
-
# Based on
|
5
|
-
# "A Numerical Procedure for Computing Chi-Square Percentage Points"
|
4
|
+
# Based on Statistics2 module
|
6
5
|
#
|
7
6
|
module ChiSquare
|
8
7
|
class << self
|
data/lib/statsample.rb
CHANGED
@@ -48,13 +48,13 @@ end
|
|
48
48
|
begin
|
49
49
|
require 'gettext'
|
50
50
|
rescue LoadError
|
51
|
-
def bindtextdomain(d)
|
51
|
+
def bindtextdomain(d) #:nodoc:
|
52
52
|
d
|
53
53
|
end
|
54
54
|
|
55
55
|
# Bored module
|
56
|
-
module GetText
|
57
|
-
def _(t)
|
56
|
+
module GetText #:nodoc:
|
57
|
+
def _(t)
|
58
58
|
t
|
59
59
|
end
|
60
60
|
end
|
@@ -81,7 +81,7 @@ end
|
|
81
81
|
#
|
82
82
|
module Statsample
|
83
83
|
|
84
|
-
VERSION = '0.4.
|
84
|
+
VERSION = '0.4.1'
|
85
85
|
SPLIT_TOKEN = ","
|
86
86
|
autoload(:Database, 'statsample/converters')
|
87
87
|
autoload(:Anova, 'statsample/anova')
|
@@ -90,9 +90,9 @@ module Statsample
|
|
90
90
|
autoload(:PlainText, 'statsample/converters')
|
91
91
|
autoload(:Excel, 'statsample/converters')
|
92
92
|
autoload(:GGobi, 'statsample/converters')
|
93
|
-
|
93
|
+
autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
|
94
94
|
autoload(:HtmlReport, 'statsample/htmlreport')
|
95
|
-
|
95
|
+
autoload(:Mx, 'statsample/converters')
|
96
96
|
autoload(:Resample, 'statsample/resample')
|
97
97
|
autoload(:SRS, 'statsample/srs')
|
98
98
|
autoload(:Codification, 'statsample/codification')
|
@@ -69,7 +69,7 @@ class DominanceAnalysis
|
|
69
69
|
out.extend report_type
|
70
70
|
out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
|
71
71
|
out.add _("Sample size: %d\n") % @n_samples
|
72
|
-
t=Distribution::T.p_value(1-((1-alfa) / 2)
|
72
|
+
t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
|
73
73
|
out.add "t:#{t}\n"
|
74
74
|
out.add "Linear Regression Engine: #{@lr_class.name}"
|
75
75
|
out.nl
|
@@ -89,8 +89,8 @@ end
|
|
89
89
|
|
90
90
|
# replaces all key and fill classes with similar ones, without opacity
|
91
91
|
# this allows rendering of svg and png on rox and gqview without problems
|
92
|
-
module SVG
|
93
|
-
module Graph
|
92
|
+
module SVG #:nodoc:
|
93
|
+
module Graph
|
94
94
|
class BarNoOp < Bar
|
95
95
|
def get_css; SVG::Graph.get_css_standard; end
|
96
96
|
end
|
data/lib/statsample/mle.rb
CHANGED
@@ -56,8 +56,8 @@ module Statsample
|
|
56
56
|
end
|
57
57
|
# Creates a zero matrix Mx1, with M=x.M
|
58
58
|
def set_default_parameters(x)
|
59
|
-
fd=x.column_size
|
60
|
-
|
59
|
+
fd=[0.0]*x.column_size
|
60
|
+
fd.push(0.1) if self.is_a? Statsample::MLE::Normal
|
61
61
|
parameters = Matrix.columns([fd])
|
62
62
|
end
|
63
63
|
|
@@ -2,13 +2,39 @@
|
|
2
2
|
module Statsample
|
3
3
|
module Regression
|
4
4
|
module Binomial
|
5
|
+
# Create a Logit model object.
|
6
|
+
# ds:: Dataset
|
7
|
+
# y:: Name of dependent vector
|
8
|
+
# Use
|
9
|
+
# dataset=Statsample::CSV.read("data.csv")
|
10
|
+
# y="y"
|
11
|
+
# lr=Statsample::Regression::Binomial.logit(dataset,y)
|
12
|
+
#
|
5
13
|
def self.logit(ds,y_var)
|
6
14
|
Logit.new(ds,y_var)
|
7
15
|
end
|
16
|
+
# Create a Probit model object.
|
17
|
+
# ds:: Dataset
|
18
|
+
# y:: Name of dependent vector
|
19
|
+
# Use
|
20
|
+
# dataset=Statsample::CSV.read("data.csv")
|
21
|
+
# y="y"
|
22
|
+
# lr=Statsample::Regression::Binomial.probit(dataset,y)
|
23
|
+
#
|
24
|
+
|
8
25
|
def self.probit(ds,y_var)
|
9
26
|
Probit.new(ds,y_var)
|
10
27
|
end
|
11
|
-
|
28
|
+
# Base Engine for binomial regression analysis.
|
29
|
+
# See Statsample::Regression::Binomial.logit() and
|
30
|
+
# Statsample::Regression::Binomial.probit for fast
|
31
|
+
# access methods.
|
32
|
+
#
|
33
|
+
# Use:
|
34
|
+
# dataset=Statsample::CSV.read("data.csv")
|
35
|
+
# y="y"
|
36
|
+
# model=Statsample::MLE::Logit.new
|
37
|
+
# lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
|
12
38
|
class BaseEngine
|
13
39
|
attr_reader :log_likehood, :iterations
|
14
40
|
def initialize(ds,y_var,model)
|
data/lib/statsample/vector.rb
CHANGED
@@ -6,10 +6,10 @@ end
|
|
6
6
|
|
7
7
|
module Statsample
|
8
8
|
class << self
|
9
|
-
# Create a matrix using vectors as columns
|
9
|
+
# Create a matrix using vectors as columns.
|
10
10
|
# Use:
|
11
11
|
#
|
12
|
-
#
|
12
|
+
# matrix=Statsample.vector_cols_matrix(v1,v2)
|
13
13
|
def vector_cols_matrix(*vs)
|
14
14
|
# test
|
15
15
|
size=vs[0].size
|
@@ -23,7 +23,7 @@ module Statsample
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
# Returns a duplicate of the input vectors, without missing data
|
26
|
-
# for any of the vectors
|
26
|
+
# for any of the vectors.
|
27
27
|
#
|
28
28
|
# a=[1,2,3,6,7,nil,3,5].to_vector(:scale)
|
29
29
|
# b=[nil,nil,5,6,4,5,10,2].to_vector(:scale)
|
@@ -89,8 +89,8 @@ class Vector
|
|
89
89
|
def dup
|
90
90
|
Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
|
91
91
|
end
|
92
|
-
# Returns an empty duplicate of the vector. Maintains the type,
|
93
|
-
# values
|
92
|
+
# Returns an empty duplicate of the vector. Maintains the type,
|
93
|
+
# missing values and labels.
|
94
94
|
def dup_empty
|
95
95
|
Vector.new([],@type,@missing_values.dup,@labels.dup)
|
96
96
|
end
|
@@ -123,7 +123,7 @@ class Vector
|
|
123
123
|
|
124
124
|
alias_method :standarized, :vector_standarized
|
125
125
|
|
126
|
-
def box_cox_transformation(lambda)
|
126
|
+
def box_cox_transformation(lambda) # :nodoc:
|
127
127
|
raise "Should be a scale" unless @type==:scale
|
128
128
|
@data_with_nils.collect{|x|
|
129
129
|
if !x.nil?
|
@@ -162,21 +162,20 @@ class Vector
|
|
162
162
|
end
|
163
163
|
# Modifies current vector, with data modified by block.
|
164
164
|
# Equivalent to #collect! on @data
|
165
|
-
|
166
165
|
def recode!
|
167
166
|
@data.collect!{|x|
|
168
167
|
yield x
|
169
168
|
}
|
170
169
|
set_valid_data
|
171
170
|
end
|
172
|
-
# Iterate on each item
|
171
|
+
# Iterate on each item.
|
173
172
|
# Equivalent to
|
174
173
|
# @data.each{|x| yield x}
|
175
174
|
def each
|
176
175
|
@data.each{|x| yield(x) }
|
177
176
|
end
|
178
177
|
|
179
|
-
# Iterate on each
|
178
|
+
# Iterate on each item, retrieving index
|
180
179
|
|
181
180
|
def each_index
|
182
181
|
(0...@data.size).each {|i|
|
@@ -185,16 +184,27 @@ class Vector
|
|
185
184
|
end
|
186
185
|
# Add a value at the end of the vector.
|
187
186
|
# If second argument set to false, you should update the Vector usign
|
188
|
-
# Vector
|
187
|
+
# Vector.set_valid_data at the end of your insertion cycle
|
189
188
|
#
|
190
189
|
def add(v,update_valid=true)
|
191
190
|
@data.push(v)
|
192
191
|
set_valid_data if update_valid
|
193
192
|
end
|
194
193
|
# Update valid_data, missing_data, data_with_nils and gsl
|
195
|
-
# at the end of an insertion
|
194
|
+
# at the end of an insertion.
|
196
195
|
#
|
197
|
-
# Use after add(v,false)
|
196
|
+
# Use after Vector.add(v,false)
|
197
|
+
# Usage:
|
198
|
+
# v=Statsample::Vector.new
|
199
|
+
# v.add(2,false)
|
200
|
+
# v.add(4,false)
|
201
|
+
# v.data
|
202
|
+
# => [2,3]
|
203
|
+
# v.valid_data
|
204
|
+
# => []
|
205
|
+
# v.set_valid_data
|
206
|
+
# v.valid_data
|
207
|
+
# => [2,3]
|
198
208
|
def set_valid_data
|
199
209
|
@valid_data.clear
|
200
210
|
@missing_data.clear
|
@@ -203,17 +213,17 @@ class Vector
|
|
203
213
|
set_valid_data_intern
|
204
214
|
set_scale_data if(@type==:scale)
|
205
215
|
end
|
206
|
-
|
216
|
+
|
207
217
|
if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
|
208
|
-
def set_valid_data_intern
|
218
|
+
def set_valid_data_intern #:nodoc:
|
209
219
|
Statsample::STATSAMPLE__.set_valid_data_intern(self)
|
210
220
|
end
|
211
221
|
else
|
212
|
-
def set_valid_data_intern
|
222
|
+
def set_valid_data_intern #:nodoc:
|
213
223
|
_set_valid_data_intern
|
214
224
|
end
|
215
225
|
end
|
216
|
-
def _set_valid_data_intern
|
226
|
+
def _set_valid_data_intern #:nodoc:
|
217
227
|
@data.each do |n|
|
218
228
|
if is_valid? n
|
219
229
|
@valid_data.push(n)
|
@@ -225,15 +235,17 @@ class Vector
|
|
225
235
|
end
|
226
236
|
@has_missing_data=@missing_data.size>0
|
227
237
|
end
|
228
|
-
|
238
|
+
|
229
239
|
# Retrieves true if data has one o more missing values
|
230
240
|
def has_missing_data?
|
231
241
|
@has_missing_data
|
232
242
|
end
|
243
|
+
# Retrieves label for value x. Retrieves x if
|
244
|
+
# no label defined.
|
233
245
|
def labeling(x)
|
234
246
|
@labels.has_key?(x) ? @labels[x].to_s : x.to_s
|
235
247
|
end
|
236
|
-
# Returns a Vector with
|
248
|
+
# Returns a Vector with data with labels replaced by the label.
|
237
249
|
def vector_labeled
|
238
250
|
d=@data.collect{|x|
|
239
251
|
if @labels.has_key? x
|
@@ -273,11 +285,11 @@ class Vector
|
|
273
285
|
@type=t
|
274
286
|
set_scale_data if(t==:scale)
|
275
287
|
end
|
276
|
-
|
277
288
|
def to_a
|
278
289
|
@data.dup
|
279
290
|
end
|
280
291
|
alias_method :to_ary, :to_a
|
292
|
+
|
281
293
|
# Vector sum.
|
282
294
|
# - If v is a scalar, add this value to all elements
|
283
295
|
# - If v is a Array or a Vector, should be of the same size of this vector
|
@@ -296,8 +308,8 @@ class Vector
|
|
296
308
|
def -(v)
|
297
309
|
_vector_ari("-",v)
|
298
310
|
end
|
299
|
-
# Reports all values that doesn't comply with a condition
|
300
|
-
# Returns a hash with the index of data and the invalid data
|
311
|
+
# Reports all values that doesn't comply with a condition.
|
312
|
+
# Returns a hash with the index of data and the invalid data.
|
301
313
|
def verify
|
302
314
|
h={}
|
303
315
|
(0...@data.size).to_a.each{|i|
|
@@ -401,7 +413,7 @@ class Vector
|
|
401
413
|
# only with valid data.
|
402
414
|
#
|
403
415
|
# In all the trails, every item have the same probability
|
404
|
-
# of been selected
|
416
|
+
# of been selected.
|
405
417
|
def sample_with_replacement(sample=1)
|
406
418
|
if(@type!=:scale or !HAS_GSL)
|
407
419
|
vds=@valid_data.size
|
@@ -414,8 +426,9 @@ class Vector
|
|
414
426
|
# Returns an random sample of size n, without replacement,
|
415
427
|
# only with valid data.
|
416
428
|
#
|
417
|
-
# Every element could only be selected once
|
418
|
-
#
|
429
|
+
# Every element could only be selected once.
|
430
|
+
#
|
431
|
+
# A sample of the same size of the vector is the vector itself.
|
419
432
|
|
420
433
|
def sample_without_replacement(sample=1)
|
421
434
|
if(@type!=:scale or !HAS_GSL)
|
@@ -432,6 +445,11 @@ class Vector
|
|
432
445
|
r.choose(@gsl, sample).to_a
|
433
446
|
end
|
434
447
|
end
|
448
|
+
# Retrieves number of cases which comply condition.
|
449
|
+
# If block given, retrieves number of instances where
|
450
|
+
# block returns true.
|
451
|
+
# If other values given, retrieves the frequency for
|
452
|
+
# this value.
|
435
453
|
def count(x=false)
|
436
454
|
if block_given?
|
437
455
|
r=@data.inject(0) {|s, i|
|
@@ -443,7 +461,8 @@ class Vector
|
|
443
461
|
frequencies[x].nil? ? 0 : frequencies[x]
|
444
462
|
end
|
445
463
|
end
|
446
|
-
|
464
|
+
|
465
|
+
# Returns the database type for the vector, according to its content
|
447
466
|
|
448
467
|
def db_type(dbs='mysql')
|
449
468
|
# first, detect any character not number
|
@@ -465,11 +484,12 @@ class Vector
|
|
465
484
|
true
|
466
485
|
end
|
467
486
|
end
|
487
|
+
|
468
488
|
def to_s
|
469
489
|
sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
|
470
490
|
end
|
471
491
|
# Ugly name. Really, create a Vector for standard 'matrix' package.
|
472
|
-
# <tt>dir</tt> could
|
492
|
+
# <tt>dir</tt> could be :horizontal or :vertical
|
473
493
|
def to_matrix(dir=:horizontal)
|
474
494
|
case dir
|
475
495
|
when :horizontal
|
@@ -481,9 +501,7 @@ class Vector
|
|
481
501
|
def inspect
|
482
502
|
self.to_s
|
483
503
|
end
|
484
|
-
|
485
|
-
@data.dup
|
486
|
-
end
|
504
|
+
# Retrieves uniques values for data.
|
487
505
|
def factors
|
488
506
|
if @type==:scale
|
489
507
|
@scale_data.uniq.sort
|
@@ -492,17 +510,17 @@ class Vector
|
|
492
510
|
end
|
493
511
|
end
|
494
512
|
if Statsample::STATSAMPLE__.respond_to?(:frequencies)
|
495
|
-
# Returns a hash with the distribution of frecuencies
|
513
|
+
# Returns a hash with the distribution of frecuencies for
|
496
514
|
# the sample
|
497
515
|
def frequencies
|
498
516
|
Statsample::STATSAMPLE__.frequencies(@valid_data)
|
499
517
|
end
|
500
518
|
else
|
501
|
-
def frequencies
|
519
|
+
def frequencies #:nodoc:
|
502
520
|
_frequencies
|
503
521
|
end
|
504
522
|
end
|
505
|
-
def _frequencies
|
523
|
+
def _frequencies #:nodoc:
|
506
524
|
@valid_data.inject(Hash.new) {|a,x|
|
507
525
|
a[x]||=0
|
508
526
|
a[x]=a[x]+1
|
@@ -589,7 +607,8 @@ class Vector
|
|
589
607
|
end
|
590
608
|
def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
|
591
609
|
Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
|
592
|
-
end
|
610
|
+
end
|
611
|
+
|
593
612
|
self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
|
594
613
|
met_or=met.gsub("_slow","")
|
595
614
|
if !self.method_defined?(met_or)
|
@@ -672,8 +691,7 @@ class Vector
|
|
672
691
|
# The arithmetical mean of data
|
673
692
|
def mean
|
674
693
|
check_type :scale
|
675
|
-
|
676
|
-
sum.to_f.quo(n_valid)
|
694
|
+
sum.to_f.quo(n_valid)
|
677
695
|
end
|
678
696
|
# Sum of squares for the data around a value.
|
679
697
|
# By default, this value is the mean
|
data/test/test_combination.rb
CHANGED
@@ -31,8 +31,8 @@ class StatsampleCombinationTestCase < Test::Unit::TestCase
|
|
31
31
|
rb.each{|y|
|
32
32
|
rb_array.push(y)
|
33
33
|
}
|
34
|
-
assert(gsl.d.is_a?
|
35
|
-
assert(rb.d.is_a?
|
34
|
+
assert(gsl.d.is_a?(Statsample::Combination::CombinationGsl))
|
35
|
+
assert(rb.d.is_a?(Statsample::Combination::CombinationRuby))
|
36
36
|
|
37
37
|
assert_equal(rb_array,gsl_array)
|
38
38
|
else
|
data/test/test_distribution.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-12 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|