statsample 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +5 -0
- data/lib/distribution.rb +8 -0
- data/lib/distribution/chisquare.rb +1 -2
- data/lib/statsample.rb +6 -6
- data/lib/statsample/dominanceanalysis.rb +1 -1
- data/lib/statsample/dominanceanalysis/bootstrap.rb +1 -1
- data/lib/statsample/graph/svggraph.rb +2 -2
- data/lib/statsample/mle.rb +2 -2
- data/lib/statsample/regression/binomial.rb +27 -1
- data/lib/statsample/vector.rb +53 -35
- data/test/test_combination.rb +2 -2
- data/test/test_distribution.rb +1 -1
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
=== 0.4.1 / 2009-09-12
|
2
|
+
* More methods and usage documentation
|
3
|
+
* Logit tests
|
4
|
+
* Bug fix: rescue for requires doesn't specify LoadError
|
5
|
+
* Binomial::BaseEngine new methods: coeffs_se, coeffs, constant and constant_se
|
1
6
|
=== 0.4.0 / 2009-09-10
|
2
7
|
* New Distribution module, based on statistics2.rb by Shin-ichiro HARA. Replaces all instances of GSL distributions pdf and cdf calculations for native calculation.
|
3
8
|
* New Maximum Likehood Estimation for Logit, Probit and Normal Distribution using Von Tessin(2005) algorithm. See MLE class and subclasses for more information.
|
data/lib/distribution.rb
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
require 'statistics2'
|
2
|
+
# Several distributions modules to calculate cdf, inverse cdf and pdf
|
3
|
+
# See Distribution::Pdf for interface.
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
# Distribution::Normal.cdf(1.96)
|
7
|
+
# => 0.97500210485178
|
8
|
+
# Distribution::Normal.p_value(0.95)
|
9
|
+
# => 1.64485364660836
|
2
10
|
module Distribution
|
3
11
|
autoload(:ChiSquare, 'distribution/chisquare')
|
4
12
|
autoload(:T, 'distribution/t')
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Distribution
|
2
2
|
# Calculate cdf and inverse cdf for Chi Square Distribution.
|
3
3
|
#
|
4
|
-
# Based on
|
5
|
-
# "A Numerical Procedure for Computing Chi-Square Percentage Points"
|
4
|
+
# Based on Statistics2 module
|
6
5
|
#
|
7
6
|
module ChiSquare
|
8
7
|
class << self
|
data/lib/statsample.rb
CHANGED
@@ -48,13 +48,13 @@ end
|
|
48
48
|
begin
|
49
49
|
require 'gettext'
|
50
50
|
rescue LoadError
|
51
|
-
def bindtextdomain(d)
|
51
|
+
def bindtextdomain(d) #:nodoc:
|
52
52
|
d
|
53
53
|
end
|
54
54
|
|
55
55
|
# Bored module
|
56
|
-
module GetText
|
57
|
-
def _(t)
|
56
|
+
module GetText #:nodoc:
|
57
|
+
def _(t)
|
58
58
|
t
|
59
59
|
end
|
60
60
|
end
|
@@ -81,7 +81,7 @@ end
|
|
81
81
|
#
|
82
82
|
module Statsample
|
83
83
|
|
84
|
-
VERSION = '0.4.
|
84
|
+
VERSION = '0.4.1'
|
85
85
|
SPLIT_TOKEN = ","
|
86
86
|
autoload(:Database, 'statsample/converters')
|
87
87
|
autoload(:Anova, 'statsample/anova')
|
@@ -90,9 +90,9 @@ module Statsample
|
|
90
90
|
autoload(:PlainText, 'statsample/converters')
|
91
91
|
autoload(:Excel, 'statsample/converters')
|
92
92
|
autoload(:GGobi, 'statsample/converters')
|
93
|
-
|
93
|
+
autoload(:DominanceAnalysis, 'statsample/dominanceanalysis')
|
94
94
|
autoload(:HtmlReport, 'statsample/htmlreport')
|
95
|
-
|
95
|
+
autoload(:Mx, 'statsample/converters')
|
96
96
|
autoload(:Resample, 'statsample/resample')
|
97
97
|
autoload(:SRS, 'statsample/srs')
|
98
98
|
autoload(:Codification, 'statsample/codification')
|
@@ -69,7 +69,7 @@ class DominanceAnalysis
|
|
69
69
|
out.extend report_type
|
70
70
|
out.add _("Summary for Bootstrap Dominance Analysis of %s on %s\n") % [@fields.join(", "), @y_var]
|
71
71
|
out.add _("Sample size: %d\n") % @n_samples
|
72
|
-
t=Distribution::T.p_value(1-((1-alfa) / 2)
|
72
|
+
t=Distribution::T.p_value(1-((1-alfa) / 2), @n_samples - 1)
|
73
73
|
out.add "t:#{t}\n"
|
74
74
|
out.add "Linear Regression Engine: #{@lr_class.name}"
|
75
75
|
out.nl
|
@@ -89,8 +89,8 @@ end
|
|
89
89
|
|
90
90
|
# replaces all key and fill classes with similar ones, without opacity
|
91
91
|
# this allows rendering of svg and png on rox and gqview without problems
|
92
|
-
module SVG
|
93
|
-
module Graph
|
92
|
+
module SVG #:nodoc:
|
93
|
+
module Graph
|
94
94
|
class BarNoOp < Bar
|
95
95
|
def get_css; SVG::Graph.get_css_standard; end
|
96
96
|
end
|
data/lib/statsample/mle.rb
CHANGED
@@ -56,8 +56,8 @@ module Statsample
|
|
56
56
|
end
|
57
57
|
# Creates a zero matrix Mx1, with M=x.M
|
58
58
|
def set_default_parameters(x)
|
59
|
-
fd=x.column_size
|
60
|
-
|
59
|
+
fd=[0.0]*x.column_size
|
60
|
+
fd.push(0.1) if self.is_a? Statsample::MLE::Normal
|
61
61
|
parameters = Matrix.columns([fd])
|
62
62
|
end
|
63
63
|
|
@@ -2,13 +2,39 @@
|
|
2
2
|
module Statsample
|
3
3
|
module Regression
|
4
4
|
module Binomial
|
5
|
+
# Create a Logit model object.
|
6
|
+
# ds:: Dataset
|
7
|
+
# y:: Name of dependent vector
|
8
|
+
# Use
|
9
|
+
# dataset=Statsample::CSV.read("data.csv")
|
10
|
+
# y="y"
|
11
|
+
# lr=Statsample::Regression::Binomial.logit(dataset,y)
|
12
|
+
#
|
5
13
|
def self.logit(ds,y_var)
|
6
14
|
Logit.new(ds,y_var)
|
7
15
|
end
|
16
|
+
# Create a Probit model object.
|
17
|
+
# ds:: Dataset
|
18
|
+
# y:: Name of dependent vector
|
19
|
+
# Use
|
20
|
+
# dataset=Statsample::CSV.read("data.csv")
|
21
|
+
# y="y"
|
22
|
+
# lr=Statsample::Regression::Binomial.probit(dataset,y)
|
23
|
+
#
|
24
|
+
|
8
25
|
def self.probit(ds,y_var)
|
9
26
|
Probit.new(ds,y_var)
|
10
27
|
end
|
11
|
-
|
28
|
+
# Base Engine for binomial regression analysis.
|
29
|
+
# See Statsample::Regression::Binomial.logit() and
|
30
|
+
# Statsample::Regression::Binomial.probit for fast
|
31
|
+
# access methods.
|
32
|
+
#
|
33
|
+
# Use:
|
34
|
+
# dataset=Statsample::CSV.read("data.csv")
|
35
|
+
# y="y"
|
36
|
+
# model=Statsample::MLE::Logit.new
|
37
|
+
# lr=Statsample::Regression::Binomial::BaseEngine(dataset, y, model)
|
12
38
|
class BaseEngine
|
13
39
|
attr_reader :log_likehood, :iterations
|
14
40
|
def initialize(ds,y_var,model)
|
data/lib/statsample/vector.rb
CHANGED
@@ -6,10 +6,10 @@ end
|
|
6
6
|
|
7
7
|
module Statsample
|
8
8
|
class << self
|
9
|
-
# Create a matrix using vectors as columns
|
9
|
+
# Create a matrix using vectors as columns.
|
10
10
|
# Use:
|
11
11
|
#
|
12
|
-
#
|
12
|
+
# matrix=Statsample.vector_cols_matrix(v1,v2)
|
13
13
|
def vector_cols_matrix(*vs)
|
14
14
|
# test
|
15
15
|
size=vs[0].size
|
@@ -23,7 +23,7 @@ module Statsample
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
# Returns a duplicate of the input vectors, without missing data
|
26
|
-
# for any of the vectors
|
26
|
+
# for any of the vectors.
|
27
27
|
#
|
28
28
|
# a=[1,2,3,6,7,nil,3,5].to_vector(:scale)
|
29
29
|
# b=[nil,nil,5,6,4,5,10,2].to_vector(:scale)
|
@@ -89,8 +89,8 @@ class Vector
|
|
89
89
|
def dup
|
90
90
|
Vector.new(@data.dup,@type,@missing_values.dup,@labels.dup)
|
91
91
|
end
|
92
|
-
# Returns an empty duplicate of the vector. Maintains the type,
|
93
|
-
# values
|
92
|
+
# Returns an empty duplicate of the vector. Maintains the type,
|
93
|
+
# missing values and labels.
|
94
94
|
def dup_empty
|
95
95
|
Vector.new([],@type,@missing_values.dup,@labels.dup)
|
96
96
|
end
|
@@ -123,7 +123,7 @@ class Vector
|
|
123
123
|
|
124
124
|
alias_method :standarized, :vector_standarized
|
125
125
|
|
126
|
-
def box_cox_transformation(lambda)
|
126
|
+
def box_cox_transformation(lambda) # :nodoc:
|
127
127
|
raise "Should be a scale" unless @type==:scale
|
128
128
|
@data_with_nils.collect{|x|
|
129
129
|
if !x.nil?
|
@@ -162,21 +162,20 @@ class Vector
|
|
162
162
|
end
|
163
163
|
# Modifies current vector, with data modified by block.
|
164
164
|
# Equivalent to #collect! on @data
|
165
|
-
|
166
165
|
def recode!
|
167
166
|
@data.collect!{|x|
|
168
167
|
yield x
|
169
168
|
}
|
170
169
|
set_valid_data
|
171
170
|
end
|
172
|
-
# Iterate on each item
|
171
|
+
# Iterate on each item.
|
173
172
|
# Equivalent to
|
174
173
|
# @data.each{|x| yield x}
|
175
174
|
def each
|
176
175
|
@data.each{|x| yield(x) }
|
177
176
|
end
|
178
177
|
|
179
|
-
# Iterate on each
|
178
|
+
# Iterate on each item, retrieving index
|
180
179
|
|
181
180
|
def each_index
|
182
181
|
(0...@data.size).each {|i|
|
@@ -185,16 +184,27 @@ class Vector
|
|
185
184
|
end
|
186
185
|
# Add a value at the end of the vector.
|
187
186
|
# If second argument set to false, you should update the Vector usign
|
188
|
-
# Vector
|
187
|
+
# Vector.set_valid_data at the end of your insertion cycle
|
189
188
|
#
|
190
189
|
def add(v,update_valid=true)
|
191
190
|
@data.push(v)
|
192
191
|
set_valid_data if update_valid
|
193
192
|
end
|
194
193
|
# Update valid_data, missing_data, data_with_nils and gsl
|
195
|
-
# at the end of an insertion
|
194
|
+
# at the end of an insertion.
|
196
195
|
#
|
197
|
-
# Use after add(v,false)
|
196
|
+
# Use after Vector.add(v,false)
|
197
|
+
# Usage:
|
198
|
+
# v=Statsample::Vector.new
|
199
|
+
# v.add(2,false)
|
200
|
+
# v.add(4,false)
|
201
|
+
# v.data
|
202
|
+
# => [2,3]
|
203
|
+
# v.valid_data
|
204
|
+
# => []
|
205
|
+
# v.set_valid_data
|
206
|
+
# v.valid_data
|
207
|
+
# => [2,3]
|
198
208
|
def set_valid_data
|
199
209
|
@valid_data.clear
|
200
210
|
@missing_data.clear
|
@@ -203,17 +213,17 @@ class Vector
|
|
203
213
|
set_valid_data_intern
|
204
214
|
set_scale_data if(@type==:scale)
|
205
215
|
end
|
206
|
-
|
216
|
+
|
207
217
|
if Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern)
|
208
|
-
def set_valid_data_intern
|
218
|
+
def set_valid_data_intern #:nodoc:
|
209
219
|
Statsample::STATSAMPLE__.set_valid_data_intern(self)
|
210
220
|
end
|
211
221
|
else
|
212
|
-
def set_valid_data_intern
|
222
|
+
def set_valid_data_intern #:nodoc:
|
213
223
|
_set_valid_data_intern
|
214
224
|
end
|
215
225
|
end
|
216
|
-
def _set_valid_data_intern
|
226
|
+
def _set_valid_data_intern #:nodoc:
|
217
227
|
@data.each do |n|
|
218
228
|
if is_valid? n
|
219
229
|
@valid_data.push(n)
|
@@ -225,15 +235,17 @@ class Vector
|
|
225
235
|
end
|
226
236
|
@has_missing_data=@missing_data.size>0
|
227
237
|
end
|
228
|
-
|
238
|
+
|
229
239
|
# Retrieves true if data has one o more missing values
|
230
240
|
def has_missing_data?
|
231
241
|
@has_missing_data
|
232
242
|
end
|
243
|
+
# Retrieves label for value x. Retrieves x if
|
244
|
+
# no label defined.
|
233
245
|
def labeling(x)
|
234
246
|
@labels.has_key?(x) ? @labels[x].to_s : x.to_s
|
235
247
|
end
|
236
|
-
# Returns a Vector with
|
248
|
+
# Returns a Vector with data with labels replaced by the label.
|
237
249
|
def vector_labeled
|
238
250
|
d=@data.collect{|x|
|
239
251
|
if @labels.has_key? x
|
@@ -273,11 +285,11 @@ class Vector
|
|
273
285
|
@type=t
|
274
286
|
set_scale_data if(t==:scale)
|
275
287
|
end
|
276
|
-
|
277
288
|
def to_a
|
278
289
|
@data.dup
|
279
290
|
end
|
280
291
|
alias_method :to_ary, :to_a
|
292
|
+
|
281
293
|
# Vector sum.
|
282
294
|
# - If v is a scalar, add this value to all elements
|
283
295
|
# - If v is a Array or a Vector, should be of the same size of this vector
|
@@ -296,8 +308,8 @@ class Vector
|
|
296
308
|
def -(v)
|
297
309
|
_vector_ari("-",v)
|
298
310
|
end
|
299
|
-
# Reports all values that doesn't comply with a condition
|
300
|
-
# Returns a hash with the index of data and the invalid data
|
311
|
+
# Reports all values that doesn't comply with a condition.
|
312
|
+
# Returns a hash with the index of data and the invalid data.
|
301
313
|
def verify
|
302
314
|
h={}
|
303
315
|
(0...@data.size).to_a.each{|i|
|
@@ -401,7 +413,7 @@ class Vector
|
|
401
413
|
# only with valid data.
|
402
414
|
#
|
403
415
|
# In all the trails, every item have the same probability
|
404
|
-
# of been selected
|
416
|
+
# of been selected.
|
405
417
|
def sample_with_replacement(sample=1)
|
406
418
|
if(@type!=:scale or !HAS_GSL)
|
407
419
|
vds=@valid_data.size
|
@@ -414,8 +426,9 @@ class Vector
|
|
414
426
|
# Returns an random sample of size n, without replacement,
|
415
427
|
# only with valid data.
|
416
428
|
#
|
417
|
-
# Every element could only be selected once
|
418
|
-
#
|
429
|
+
# Every element could only be selected once.
|
430
|
+
#
|
431
|
+
# A sample of the same size of the vector is the vector itself.
|
419
432
|
|
420
433
|
def sample_without_replacement(sample=1)
|
421
434
|
if(@type!=:scale or !HAS_GSL)
|
@@ -432,6 +445,11 @@ class Vector
|
|
432
445
|
r.choose(@gsl, sample).to_a
|
433
446
|
end
|
434
447
|
end
|
448
|
+
# Retrieves number of cases which comply condition.
|
449
|
+
# If block given, retrieves number of instances where
|
450
|
+
# block returns true.
|
451
|
+
# If other values given, retrieves the frequency for
|
452
|
+
# this value.
|
435
453
|
def count(x=false)
|
436
454
|
if block_given?
|
437
455
|
r=@data.inject(0) {|s, i|
|
@@ -443,7 +461,8 @@ class Vector
|
|
443
461
|
frequencies[x].nil? ? 0 : frequencies[x]
|
444
462
|
end
|
445
463
|
end
|
446
|
-
|
464
|
+
|
465
|
+
# Returns the database type for the vector, according to its content
|
447
466
|
|
448
467
|
def db_type(dbs='mysql')
|
449
468
|
# first, detect any character not number
|
@@ -465,11 +484,12 @@ class Vector
|
|
465
484
|
true
|
466
485
|
end
|
467
486
|
end
|
487
|
+
|
468
488
|
def to_s
|
469
489
|
sprintf("Vector(type:%s, n:%d)[%s]",@type.to_s,@data.size, @data.collect{|d| d.nil? ? "nil":d}.join(","))
|
470
490
|
end
|
471
491
|
# Ugly name. Really, create a Vector for standard 'matrix' package.
|
472
|
-
# <tt>dir</tt> could
|
492
|
+
# <tt>dir</tt> could be :horizontal or :vertical
|
473
493
|
def to_matrix(dir=:horizontal)
|
474
494
|
case dir
|
475
495
|
when :horizontal
|
@@ -481,9 +501,7 @@ class Vector
|
|
481
501
|
def inspect
|
482
502
|
self.to_s
|
483
503
|
end
|
484
|
-
|
485
|
-
@data.dup
|
486
|
-
end
|
504
|
+
# Retrieves uniques values for data.
|
487
505
|
def factors
|
488
506
|
if @type==:scale
|
489
507
|
@scale_data.uniq.sort
|
@@ -492,17 +510,17 @@ class Vector
|
|
492
510
|
end
|
493
511
|
end
|
494
512
|
if Statsample::STATSAMPLE__.respond_to?(:frequencies)
|
495
|
-
# Returns a hash with the distribution of frecuencies
|
513
|
+
# Returns a hash with the distribution of frecuencies for
|
496
514
|
# the sample
|
497
515
|
def frequencies
|
498
516
|
Statsample::STATSAMPLE__.frequencies(@valid_data)
|
499
517
|
end
|
500
518
|
else
|
501
|
-
def frequencies
|
519
|
+
def frequencies #:nodoc:
|
502
520
|
_frequencies
|
503
521
|
end
|
504
522
|
end
|
505
|
-
def _frequencies
|
523
|
+
def _frequencies #:nodoc:
|
506
524
|
@valid_data.inject(Hash.new) {|a,x|
|
507
525
|
a[x]||=0
|
508
526
|
a[x]=a[x]+1
|
@@ -589,7 +607,8 @@ class Vector
|
|
589
607
|
end
|
590
608
|
def proportion_confidence_interval_z(n_poblation,margin=0.95,v=1)
|
591
609
|
Statsample::proportion_confidence_interval_z(proportion(v), @valid_data.size, n_poblation, margin)
|
592
|
-
end
|
610
|
+
end
|
611
|
+
|
593
612
|
self.instance_methods.find_all{|met| met=~/_slow$/}.each{|met|
|
594
613
|
met_or=met.gsub("_slow","")
|
595
614
|
if !self.method_defined?(met_or)
|
@@ -672,8 +691,7 @@ class Vector
|
|
672
691
|
# The arithmetical mean of data
|
673
692
|
def mean
|
674
693
|
check_type :scale
|
675
|
-
|
676
|
-
sum.to_f.quo(n_valid)
|
694
|
+
sum.to_f.quo(n_valid)
|
677
695
|
end
|
678
696
|
# Sum of squares for the data around a value.
|
679
697
|
# By default, this value is the mean
|
data/test/test_combination.rb
CHANGED
@@ -31,8 +31,8 @@ class StatsampleCombinationTestCase < Test::Unit::TestCase
|
|
31
31
|
rb.each{|y|
|
32
32
|
rb_array.push(y)
|
33
33
|
}
|
34
|
-
assert(gsl.d.is_a?
|
35
|
-
assert(rb.d.is_a?
|
34
|
+
assert(gsl.d.is_a?(Statsample::Combination::CombinationGsl))
|
35
|
+
assert(rb.d.is_a?(Statsample::Combination::CombinationRuby))
|
36
36
|
|
37
37
|
assert_equal(rb_array,gsl_array)
|
38
38
|
else
|
data/test/test_distribution.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: statsample
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Claudio Bustos
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-09-
|
12
|
+
date: 2009-09-12 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|