statsample 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +2 -1
- data/History.txt +11 -0
- data/Manifest.txt +2 -3
- data/README.txt +0 -17
- data/Rakefile +10 -9
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/examples/principal_axis.rb +2 -0
- data/examples/u_test.rb +8 -0
- data/lib/distribution.rb +1 -1
- data/lib/statsample.rb +12 -12
- data/lib/statsample/anova/oneway.rb +4 -4
- data/lib/statsample/bivariate.rb +10 -3
- data/lib/statsample/bivariate/pearson.rb +55 -0
- data/lib/statsample/dataset.rb +57 -49
- data/lib/statsample/dominanceanalysis.rb +1 -2
- data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
- data/lib/statsample/factor.rb +0 -1
- data/lib/statsample/factor/parallelanalysis.rb +9 -13
- data/lib/statsample/factor/pca.rb +5 -10
- data/lib/statsample/factor/principalaxis.rb +27 -33
- data/lib/statsample/matrix.rb +11 -11
- data/lib/statsample/mle.rb +0 -1
- data/lib/statsample/regression.rb +0 -1
- data/lib/statsample/reliability.rb +2 -2
- data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
- data/lib/statsample/reliability/scaleanalysis.rb +5 -6
- data/lib/statsample/test/f.rb +2 -5
- data/lib/statsample/test/levene.rb +2 -5
- data/lib/statsample/test/t.rb +4 -13
- data/lib/statsample/test/umannwhitney.rb +19 -19
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +304 -111
- data/po/statsample.pot +224 -90
- data/test/test_bivariate.rb +8 -69
- data/test/test_reliability.rb +3 -4
- metadata +30 -18
- metadata.gz.sig +0 -0
- data/lib/statsample/bivariate/polychoric.rb +0 -893
- data/lib/statsample/bivariate/tetrachoric.rb +0 -457
- data/test/test_bivariate_polychoric.rb +0 -70
data.tar.gz.sig
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
|
1
|
+
�2�������Ab���Է����;P�
|
2
|
+
��m��Iƚ��Xr�(ηV����:Pd�Y�����L���ϡ���-R���'��;�2l�n'40XH���!�(�l=�,�gX�|�N���{L�--�sWtr/b��^L�-tB?�I%�H�o�fk#HI��uc�V��c�0 ��T���x��)����v�(�f<0$�Zev��S��^�t*F̞�@�U�2d���8���(v��JUs�Q6�Ǐ/�#���S�;
|
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
=== 0.13.0 / 2010-06-13
|
2
|
+
|
3
|
+
* Polychoric and Tetrachoric moved to gem statsample-bivariate-extension
|
4
|
+
* All classes left with summary method include Summarizable now. Every method which return localizable string is now parsed with _()
|
5
|
+
* Correct implementation of Reliability::MultiScaleAnalysis.
|
6
|
+
* Spanish translation for Mann-Whitney's U
|
7
|
+
* Added example for Mann-Whitney's U test
|
8
|
+
* Better summary for Mann-Whitney's U Test
|
9
|
+
* Added Statsample::Bivariate::Pearson class to retrieve complete analysis for r correlations
|
10
|
+
* Bug fix on DominanceAnalysis::Bootstrap
|
11
|
+
|
1
12
|
=== 0.12.0 / 2010-06-09
|
2
13
|
|
3
14
|
* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
|
data/Manifest.txt
CHANGED
@@ -25,6 +25,7 @@ examples/principal_axis.rb
|
|
25
25
|
examples/reliability.rb
|
26
26
|
examples/t_test.rb
|
27
27
|
examples/tetrachoric.rb
|
28
|
+
examples/u_test.rb
|
28
29
|
examples/vector.rb
|
29
30
|
lib/distribution.rb
|
30
31
|
lib/distribution/chisquare.rb
|
@@ -39,8 +40,7 @@ lib/statsample/anova.rb
|
|
39
40
|
lib/statsample/anova/oneway.rb
|
40
41
|
lib/statsample/anova/twoway.rb
|
41
42
|
lib/statsample/bivariate.rb
|
42
|
-
lib/statsample/bivariate/
|
43
|
-
lib/statsample/bivariate/tetrachoric.rb
|
43
|
+
lib/statsample/bivariate/pearson.rb
|
44
44
|
lib/statsample/codification.rb
|
45
45
|
lib/statsample/combination.rb
|
46
46
|
lib/statsample/converter/csv.rb
|
@@ -101,7 +101,6 @@ test/test_anovatwoway.rb
|
|
101
101
|
test/test_anovatwowaywithdataset.rb
|
102
102
|
test/test_anovawithvectors.rb
|
103
103
|
test/test_bivariate.rb
|
104
|
-
test/test_bivariate_polychoric.rb
|
105
104
|
test/test_codification.rb
|
106
105
|
test/test_combination.rb
|
107
106
|
test/test_crosstab.rb
|
data/README.txt
CHANGED
@@ -76,23 +76,6 @@ Include:
|
|
76
76
|
cm=Statsample::Bivariate.correlation_matrix(ds)
|
77
77
|
puts cm.summary
|
78
78
|
|
79
|
-
=== Tetrachoric correlation
|
80
|
-
|
81
|
-
require 'statsample'
|
82
|
-
a=40
|
83
|
-
b=10
|
84
|
-
c=20
|
85
|
-
d=30
|
86
|
-
tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
87
|
-
puts tetra.summary
|
88
|
-
|
89
|
-
=== Polychoric correlation
|
90
|
-
|
91
|
-
require 'statsample'
|
92
|
-
ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
93
|
-
|
94
|
-
poly=Statsample::Bivariate::Polychoric.new(ct)
|
95
|
-
puts poly.summary
|
96
79
|
|
97
80
|
== REQUIREMENTS:
|
98
81
|
|
data/Rakefile
CHANGED
@@ -23,13 +23,13 @@ task :release do
|
|
23
23
|
system %{git push origin master}
|
24
24
|
end
|
25
25
|
desc "Update pot/po files."
|
26
|
-
task :updatepo do
|
26
|
+
task "gettext:updatepo" do
|
27
27
|
require 'gettext/tools'
|
28
28
|
GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
|
29
29
|
end
|
30
30
|
|
31
31
|
desc "Create mo-files"
|
32
|
-
task :makemo do
|
32
|
+
task "gettext:makemo" do
|
33
33
|
require 'gettext/tools'
|
34
34
|
GetText.create_mofiles()
|
35
35
|
# GetText.create_mofiles(true, "po", "locale") # This is for "Ruby on Rails".
|
@@ -40,7 +40,8 @@ h=Hoe.spec('statsample') do
|
|
40
40
|
#self.testlib=:minitest
|
41
41
|
self.rubyforge_name = "ruby-statsample"
|
42
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
43
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"]
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"] << ["statsample-bivariate-extension", "~>0.13.0"]
|
44
|
+
|
44
45
|
self.extra_dev_deps << ["shoulda"]
|
45
46
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
46
47
|
self.post_install_message = <<-EOF
|
@@ -51,13 +52,13 @@ On *nix, you should install statsample-optimization
|
|
51
52
|
to retrieve gems gsl, statistics2 and a C extension
|
52
53
|
to speed some methods.
|
53
54
|
|
54
|
-
$sudo gem install statsample-optimization
|
55
|
+
$ sudo gem install statsample-optimization
|
55
56
|
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
On Ubuntu, install build-essential and libgsl0-dev
|
58
|
+
using apt-get and compile ruby 1.8 or 1.9 from
|
59
|
+
source code first.
|
59
60
|
|
60
|
-
$sudo apt-get install build-essential libgsl0-dev
|
61
|
+
$ sudo apt-get install build-essential libgsl0-dev
|
61
62
|
|
62
63
|
|
63
64
|
*****************************************************
|
@@ -90,7 +91,7 @@ Rake::RDocTask.new(:docs) do |rd|
|
|
90
91
|
end
|
91
92
|
|
92
93
|
|
93
|
-
desc '
|
94
|
+
desc 'Publish rdocs with analytics support'
|
94
95
|
task :publicar_docs => [:clean, :docs] do
|
95
96
|
ruby %{agregar_adsense_a_doc.rb}
|
96
97
|
path = File.expand_path("~/.rubyforge/user-config.yml")
|
Binary file
|
data/examples/principal_axis.rb
CHANGED
@@ -4,5 +4,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
4
4
|
require 'statsample'
|
5
5
|
matrix=Matrix[
|
6
6
|
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
|
7
|
+
matrix.extend Statsample::CovariateMatrix
|
8
|
+
#matrix.fields=%w{a b c d}
|
7
9
|
fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1,:smc=>false)
|
8
10
|
puts fa.summary
|
data/examples/u_test.rb
ADDED
data/lib/distribution.rb
CHANGED
data/lib/statsample.rb
CHANGED
@@ -31,9 +31,9 @@ end
|
|
31
31
|
class String
|
32
32
|
def is_number?
|
33
33
|
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
34
|
-
|
34
|
+
true
|
35
35
|
else
|
36
|
-
|
36
|
+
false
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -112,7 +112,7 @@ module Statsample
|
|
112
112
|
false
|
113
113
|
end
|
114
114
|
end
|
115
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.13.0'
|
116
116
|
SPLIT_TOKEN = ","
|
117
117
|
autoload(:Database, 'statsample/converters')
|
118
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -201,19 +201,19 @@ module Statsample
|
|
201
201
|
|
202
202
|
|
203
203
|
|
204
|
-
|
204
|
+
module Util
|
205
205
|
# Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
|
206
206
|
def normal_order_statistic_medians(i,n)
|
207
207
|
if i==1
|
208
|
-
|
208
|
+
u= 1.0 - normal_order_statistic_medians(n,n)
|
209
209
|
elsif i==n
|
210
|
-
|
210
|
+
u=0.5**(1 / n.to_f)
|
211
211
|
else
|
212
|
-
|
212
|
+
u= (i - 0.3175) / (n + 0.365)
|
213
213
|
end
|
214
214
|
u
|
215
215
|
end
|
216
|
-
|
216
|
+
end
|
217
217
|
|
218
218
|
|
219
219
|
|
@@ -224,7 +224,7 @@ module Statsample
|
|
224
224
|
fp.close
|
225
225
|
end
|
226
226
|
end
|
227
|
-
# Provides
|
227
|
+
# Provides method summary to generate summaries and include GetText
|
228
228
|
module Summarizable
|
229
229
|
include GetText
|
230
230
|
bindtextdomain("statsample")
|
@@ -242,12 +242,12 @@ end
|
|
242
242
|
begin
|
243
243
|
require 'statsamplert'
|
244
244
|
rescue LoadError
|
245
|
-
module Statsample
|
246
|
-
|
245
|
+
module Statsample
|
246
|
+
OPTIMIZED=false
|
247
247
|
end
|
248
248
|
end
|
249
249
|
|
250
250
|
require 'statsample/vector'
|
251
251
|
require 'statsample/dataset'
|
252
252
|
require 'statsample/crosstab'
|
253
|
-
require 'statsample/matrix'
|
253
|
+
require 'statsample/matrix'
|
@@ -32,8 +32,8 @@ module Statsample
|
|
32
32
|
@ss_total=@ss_num+@ss_den
|
33
33
|
@ms_total=@ms_num+@ms_den
|
34
34
|
opts_default={:name=>"ANOVA",
|
35
|
-
:name_denominator=>"Explained variance",
|
36
|
-
:name_numerator=>"Unexplained variance"}
|
35
|
+
:name_denominator=>_("Explained variance"),
|
36
|
+
:name_numerator=>_("Unexplained variance")}
|
37
37
|
@opts=opts_default.merge(opts)
|
38
38
|
opts_default.keys.each {|k|
|
39
39
|
send("#{k}=", @opts[k])
|
@@ -89,8 +89,8 @@ module Statsample
|
|
89
89
|
end
|
90
90
|
opts||=Hash.new
|
91
91
|
opts_default={:name=>_("Anova One-Way"),
|
92
|
-
:name_numerator=>"Between Groups",
|
93
|
-
:name_denominator=>"Within Groups",
|
92
|
+
:name_numerator=>_("Between Groups"),
|
93
|
+
:name_denominator=>_("Within Groups"),
|
94
94
|
:summary_descriptives=>false,
|
95
95
|
:summary_levene=>false}
|
96
96
|
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
-
require 'statsample/bivariate/
|
2
|
-
|
1
|
+
require 'statsample/bivariate/pearson'
|
2
|
+
|
3
3
|
module Statsample
|
4
|
-
# Diverse
|
4
|
+
# Diverse methods and classes to calculate bivariate relations
|
5
|
+
# Specific classes:
|
6
|
+
# * Statsample::Bivariate::Pearson : Pearson correlation coefficient (r)
|
7
|
+
# * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
|
8
|
+
# * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
|
5
9
|
module Bivariate
|
10
|
+
autoload(:Polychoric, "statsample/bivariate/polychoric")
|
11
|
+
autoload(:Tetrachoric, "statsample/bivariate/tetrachoric")
|
12
|
+
|
6
13
|
class << self
|
7
14
|
# Covariance between two vectors
|
8
15
|
def covariance(v1,v2)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Bivariate
|
3
|
+
# = Pearson correlation coefficient (r)
|
4
|
+
#
|
5
|
+
# The moment-product Pearson's correlation coefficient, known as 'r'
|
6
|
+
# is a measure of bivariate associate between two continous
|
7
|
+
# variables.
|
8
|
+
#
|
9
|
+
# == Usage
|
10
|
+
# a = [1,2,3,4,5,6].to_scale
|
11
|
+
# b = [2,3,4,5,6,7].to_scale
|
12
|
+
# pearson = Statsample::Bivariate::Pearson.new(a,b)
|
13
|
+
# puts pearson.r
|
14
|
+
# puts pearson.t
|
15
|
+
# puts pearson.probability
|
16
|
+
#
|
17
|
+
# puts pearson.summary
|
18
|
+
#
|
19
|
+
class Pearson
|
20
|
+
|
21
|
+
include Statsample::Test
|
22
|
+
include Summarizable
|
23
|
+
# Name of correlation
|
24
|
+
attr_accessor :name
|
25
|
+
# Tails for probability (:both, :left or :right)
|
26
|
+
attr_accessor :tails
|
27
|
+
attr_accessor :n
|
28
|
+
def initialize(v1,v2,opts=Hash.new)
|
29
|
+
@v1_name,@v2_name = v1.name,v2.name
|
30
|
+
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
|
31
|
+
@n=@v1.size
|
32
|
+
opts_default={
|
33
|
+
:name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
|
34
|
+
:tails=>:both
|
35
|
+
}
|
36
|
+
@opts=opts.merge(opts_default)
|
37
|
+
@opts.each{|k,v|
|
38
|
+
self.send("#{k}=",v) if self.respond_to? k
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def r
|
42
|
+
Statsample::Bivariate.pearson(@v1,@v2)
|
43
|
+
end
|
44
|
+
def t
|
45
|
+
Statsample::Bivariate.t_pearson(@v1,@v2)
|
46
|
+
end
|
47
|
+
def probability
|
48
|
+
p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
|
49
|
+
end
|
50
|
+
def report_building(builder)
|
51
|
+
builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/statsample/dataset.rb
CHANGED
@@ -39,13 +39,15 @@ module Statsample
|
|
39
39
|
#
|
40
40
|
#
|
41
41
|
# ==Usage
|
42
|
-
# Create a empty dataset
|
42
|
+
# Create a empty dataset:
|
43
43
|
# Dataset.new()
|
44
|
-
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt
|
44
|
+
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>:
|
45
45
|
# Dataset.new(%w{v1 v2 v3})
|
46
|
-
# Create a dataset with two vectors
|
46
|
+
# Create a dataset with two vectors, called <tt>v1</tt>
|
47
|
+
# and <tt>v2</tt>:
|
47
48
|
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
48
|
-
# Create a dataset with two given vectors (v1 and v2),
|
49
|
+
# Create a dataset with two given vectors (v1 and v2),
|
50
|
+
# with vectors on inverted order:
|
49
51
|
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
|
50
52
|
#
|
51
53
|
# The fast way to create a dataset uses Hash#to_dataset, with
|
@@ -59,7 +61,7 @@ module Statsample
|
|
59
61
|
include Summarizable
|
60
62
|
# Hash of Statsample::Vector
|
61
63
|
attr_reader :vectors
|
62
|
-
# Ordered
|
64
|
+
# Ordered ids of vectors
|
63
65
|
attr_reader :fields
|
64
66
|
# Name of dataset
|
65
67
|
attr_accessor:name
|
@@ -67,7 +69,7 @@ module Statsample
|
|
67
69
|
attr_reader :cases
|
68
70
|
# Location of pointer on enumerations methods (like #each)
|
69
71
|
attr_reader :i
|
70
|
-
|
72
|
+
|
71
73
|
# Generates a new dataset, using three vectors
|
72
74
|
# - Rows
|
73
75
|
# - Columns
|
@@ -87,7 +89,8 @@ module Statsample
|
|
87
89
|
# b 1 0
|
88
90
|
#
|
89
91
|
# Useful to process outputs from databases
|
90
|
-
#
|
92
|
+
#
|
93
|
+
|
91
94
|
def self.crosstab_by_asignation(rows,columns,values)
|
92
95
|
raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
|
93
96
|
cols_values=columns.factors
|
@@ -123,7 +126,6 @@ module Statsample
|
|
123
126
|
# [fields] Array of names for vectors. Is only used for set the
|
124
127
|
# order of variables. If empty, vectors keys on alfabethic order as
|
125
128
|
# used as fields
|
126
|
-
|
127
129
|
#
|
128
130
|
def initialize(vectors={}, fields=[])
|
129
131
|
@@n_dataset||=0
|
@@ -141,6 +143,10 @@ module Statsample
|
|
141
143
|
end
|
142
144
|
@i=nil
|
143
145
|
end
|
146
|
+
|
147
|
+
#
|
148
|
+
# Returns a GSL::matrix
|
149
|
+
#
|
144
150
|
def to_gsl_matrix
|
145
151
|
matrix=GSL::Matrix.alloc(cases,@vectors.size)
|
146
152
|
each_array do |row|
|
@@ -171,7 +177,7 @@ module Statsample
|
|
171
177
|
end
|
172
178
|
# Returns a duplicate of the Database
|
173
179
|
# If fields given, only include those vectors.
|
174
|
-
# Every vector will be dup
|
180
|
+
# Every vector will be dup.
|
175
181
|
def dup(*fields_to_include)
|
176
182
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
177
183
|
fields_to_include=fields_to_include[0]
|
@@ -186,6 +192,10 @@ module Statsample
|
|
186
192
|
}
|
187
193
|
Dataset.new(vectors,fields)
|
188
194
|
end
|
195
|
+
# Returns (when possible) a cheap copy of dataset.
|
196
|
+
# If no vector have missing values, returns original vectors.
|
197
|
+
# If missing values presents, uses Dataset.dup_only_valid
|
198
|
+
#
|
189
199
|
def clone_only_valid(*fields_to_include)
|
190
200
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
191
201
|
fields_to_include=fields_to_include[0]
|
@@ -240,7 +250,7 @@ module Statsample
|
|
240
250
|
ds_new.update_valid_data
|
241
251
|
ds_new
|
242
252
|
end
|
243
|
-
|
253
|
+
# Returns a dataset with standarized data
|
244
254
|
def standarize
|
245
255
|
ds=dup()
|
246
256
|
ds.fields.each do |f|
|
@@ -261,15 +271,18 @@ module Statsample
|
|
261
271
|
def ==(d2)
|
262
272
|
@vectors==d2.vectors and @fields==d2.fields
|
263
273
|
end
|
274
|
+
# Returns vector <tt>c</tt>
|
264
275
|
def col(c)
|
265
276
|
@vectors[c]
|
266
277
|
end
|
267
278
|
alias_method :vector, :col
|
268
|
-
|
279
|
+
# Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
|
280
|
+
def add_vector(name, vector)
|
269
281
|
raise ArgumentError, "Vector have different size" if vector.size!=@cases
|
270
282
|
@vectors[name]=vector
|
271
283
|
check_order
|
272
284
|
end
|
285
|
+
# Returns true if dataset have vector <tt>v</tt>
|
273
286
|
def has_vector? (v)
|
274
287
|
return @vectors.has_key?(v)
|
275
288
|
end
|
@@ -295,8 +308,8 @@ module Statsample
|
|
295
308
|
# * Hash: keys equal to fields
|
296
309
|
# If uvd is false, #update_valid_data is not executed after
|
297
310
|
# inserting a case. This is very useful if you want to increase the
|
298
|
-
# performance on inserting many cases,
|
299
|
-
#
|
311
|
+
# performance on inserting many cases, because #update_valid_data
|
312
|
+
# performs check on vectors and on the dataset
|
300
313
|
|
301
314
|
def add_case(v,uvd=true)
|
302
315
|
case v
|
@@ -323,7 +336,7 @@ module Statsample
|
|
323
336
|
@fields.each{|f| @vectors[f].set_valid_data}
|
324
337
|
check_length
|
325
338
|
end
|
326
|
-
# Delete
|
339
|
+
# Delete vector named <tt>name</tt>.
|
327
340
|
def delete_vector(name)
|
328
341
|
@fields.delete(name)
|
329
342
|
@vectors.delete(name)
|
@@ -345,26 +358,27 @@ module Statsample
|
|
345
358
|
add_vector(name+join+k,v)
|
346
359
|
}
|
347
360
|
end
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
361
|
+
|
362
|
+
def vector_by_calculation(type=:scale)
|
363
|
+
a=[]
|
364
|
+
each do |row|
|
365
|
+
a.push(yield(row))
|
366
|
+
end
|
367
|
+
a.to_vector(type)
|
368
|
+
end
|
369
|
+
# Returns a vector with sumatory of fields
|
370
|
+
# if fields parameter is empty, sum all fields
|
371
|
+
def vector_sum(fields=nil)
|
372
|
+
a=[]
|
373
|
+
fields||=@fields
|
374
|
+
collect_with_index do |row, i|
|
375
|
+
if(fields.find{|f| !@vectors[f].data_with_nils[i]})
|
376
|
+
nil
|
377
|
+
else
|
378
|
+
fields.inject(0) {|ac,v| ac + row[v].to_f}
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
368
382
|
# Check if #fields attribute is correct, after inserting or deleting vectors
|
369
383
|
def check_fields(fields)
|
370
384
|
fields||=@fields
|
@@ -614,14 +628,14 @@ module Statsample
|
|
614
628
|
ds
|
615
629
|
end
|
616
630
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
631
|
+
# creates a new vector with the data of a given field which the block returns true
|
632
|
+
def filter_field(field)
|
633
|
+
a=[]
|
634
|
+
each do |c|
|
635
|
+
a.push(c[field]) if yield c
|
636
|
+
end
|
637
|
+
a.to_vector(@vectors[field].type)
|
638
|
+
end
|
625
639
|
|
626
640
|
def to_multiset_by_split_one_field(field)
|
627
641
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
@@ -802,8 +816,7 @@ module Statsample
|
|
802
816
|
ds.update_valid_data
|
803
817
|
ds
|
804
818
|
end
|
805
|
-
|
806
|
-
def report_building(b)
|
819
|
+
def report_building(b)
|
807
820
|
b.section(:name=>@name) do |g|
|
808
821
|
g.text _"Cases: %d" % cases
|
809
822
|
@fields.each do |f|
|
@@ -811,11 +824,6 @@ module Statsample
|
|
811
824
|
g.parse_element(@vectors[f])
|
812
825
|
end
|
813
826
|
end
|
814
|
-
end
|
815
|
-
def as_r
|
816
|
-
require 'rsruby/dataframe'
|
817
|
-
r=RSRuby.instance
|
818
|
-
|
819
827
|
end
|
820
828
|
end
|
821
829
|
end
|