statsample 0.12.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +2 -1
- data/History.txt +11 -0
- data/Manifest.txt +2 -3
- data/README.txt +0 -17
- data/Rakefile +10 -9
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/examples/principal_axis.rb +2 -0
- data/examples/u_test.rb +8 -0
- data/lib/distribution.rb +1 -1
- data/lib/statsample.rb +12 -12
- data/lib/statsample/anova/oneway.rb +4 -4
- data/lib/statsample/bivariate.rb +10 -3
- data/lib/statsample/bivariate/pearson.rb +55 -0
- data/lib/statsample/dataset.rb +57 -49
- data/lib/statsample/dominanceanalysis.rb +1 -2
- data/lib/statsample/dominanceanalysis/bootstrap.rb +46 -54
- data/lib/statsample/factor.rb +0 -1
- data/lib/statsample/factor/parallelanalysis.rb +9 -13
- data/lib/statsample/factor/pca.rb +5 -10
- data/lib/statsample/factor/principalaxis.rb +27 -33
- data/lib/statsample/matrix.rb +11 -11
- data/lib/statsample/mle.rb +0 -1
- data/lib/statsample/regression.rb +0 -1
- data/lib/statsample/reliability.rb +2 -2
- data/lib/statsample/reliability/multiscaleanalysis.rb +62 -15
- data/lib/statsample/reliability/scaleanalysis.rb +5 -6
- data/lib/statsample/test/f.rb +2 -5
- data/lib/statsample/test/levene.rb +2 -5
- data/lib/statsample/test/t.rb +4 -13
- data/lib/statsample/test/umannwhitney.rb +19 -19
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +304 -111
- data/po/statsample.pot +224 -90
- data/test/test_bivariate.rb +8 -69
- data/test/test_reliability.rb +3 -4
- metadata +30 -18
- metadata.gz.sig +0 -0
- data/lib/statsample/bivariate/polychoric.rb +0 -893
- data/lib/statsample/bivariate/tetrachoric.rb +0 -457
- data/test/test_bivariate_polychoric.rb +0 -70
data.tar.gz.sig
CHANGED
@@ -1 +1,2 @@
|
|
1
|
-
|
1
|
+
�2�������Ab���Է����;P�
|
2
|
+
��m��Iƚ��Xr�(ηV����:Pd�Y�����L���ϡ���-R���'��;�2l�n'40XH���!�(�l=�,�gX�|�N���{L�--�sWtr/b��^L�-tB?�I%�H�o�fk#HI��uc�V��c�0 ��T���x��)����v�(�f<0$�Zev��S��^�t*F̞�@�U�2d���8���(v��JUs�Q6�Ǐ/�#���S�;
|
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
=== 0.13.0 / 2010-06-13
|
2
|
+
|
3
|
+
* Polychoric and Tetrachoric moved to gem statsample-bivariate-extension
|
4
|
+
* All classes left with summary method include Summarizable now. Every method which return localizable string is now parsed with _()
|
5
|
+
* Correct implementation of Reliability::MultiScaleAnalysis.
|
6
|
+
* Spanish translation for Mann-Whitney's U
|
7
|
+
* Added example for Mann-Whitney's U test
|
8
|
+
* Better summary for Mann-Whitney's U Test
|
9
|
+
* Added Statsample::Bivariate::Pearson class to retrieve complete analysis for r correlations
|
10
|
+
* Bug fix on DominanceAnalysis::Bootstrap
|
11
|
+
|
1
12
|
=== 0.12.0 / 2010-06-09
|
2
13
|
|
3
14
|
* Modified Rakefile to remove dependencies based on C extensions. These are moved to statsample-optimization
|
data/Manifest.txt
CHANGED
@@ -25,6 +25,7 @@ examples/principal_axis.rb
|
|
25
25
|
examples/reliability.rb
|
26
26
|
examples/t_test.rb
|
27
27
|
examples/tetrachoric.rb
|
28
|
+
examples/u_test.rb
|
28
29
|
examples/vector.rb
|
29
30
|
lib/distribution.rb
|
30
31
|
lib/distribution/chisquare.rb
|
@@ -39,8 +40,7 @@ lib/statsample/anova.rb
|
|
39
40
|
lib/statsample/anova/oneway.rb
|
40
41
|
lib/statsample/anova/twoway.rb
|
41
42
|
lib/statsample/bivariate.rb
|
42
|
-
lib/statsample/bivariate/
|
43
|
-
lib/statsample/bivariate/tetrachoric.rb
|
43
|
+
lib/statsample/bivariate/pearson.rb
|
44
44
|
lib/statsample/codification.rb
|
45
45
|
lib/statsample/combination.rb
|
46
46
|
lib/statsample/converter/csv.rb
|
@@ -101,7 +101,6 @@ test/test_anovatwoway.rb
|
|
101
101
|
test/test_anovatwowaywithdataset.rb
|
102
102
|
test/test_anovawithvectors.rb
|
103
103
|
test/test_bivariate.rb
|
104
|
-
test/test_bivariate_polychoric.rb
|
105
104
|
test/test_codification.rb
|
106
105
|
test/test_combination.rb
|
107
106
|
test/test_crosstab.rb
|
data/README.txt
CHANGED
@@ -76,23 +76,6 @@ Include:
|
|
76
76
|
cm=Statsample::Bivariate.correlation_matrix(ds)
|
77
77
|
puts cm.summary
|
78
78
|
|
79
|
-
=== Tetrachoric correlation
|
80
|
-
|
81
|
-
require 'statsample'
|
82
|
-
a=40
|
83
|
-
b=10
|
84
|
-
c=20
|
85
|
-
d=30
|
86
|
-
tetra=Statsample::Bivariate::Tetrachoric.new(a,b,c,d)
|
87
|
-
puts tetra.summary
|
88
|
-
|
89
|
-
=== Polychoric correlation
|
90
|
-
|
91
|
-
require 'statsample'
|
92
|
-
ct=Matrix[[58,52,1],[26,58,3],[8,12,9]]
|
93
|
-
|
94
|
-
poly=Statsample::Bivariate::Polychoric.new(ct)
|
95
|
-
puts poly.summary
|
96
79
|
|
97
80
|
== REQUIREMENTS:
|
98
81
|
|
data/Rakefile
CHANGED
@@ -23,13 +23,13 @@ task :release do
|
|
23
23
|
system %{git push origin master}
|
24
24
|
end
|
25
25
|
desc "Update pot/po files."
|
26
|
-
task :updatepo do
|
26
|
+
task "gettext:updatepo" do
|
27
27
|
require 'gettext/tools'
|
28
28
|
GetText.update_pofiles("statsample", Dir.glob("{lib,bin}/**/*.{rb,rhtml}"), "statsample #{Statsample::VERSION}")
|
29
29
|
end
|
30
30
|
|
31
31
|
desc "Create mo-files"
|
32
|
-
task :makemo do
|
32
|
+
task "gettext:makemo" do
|
33
33
|
require 'gettext/tools'
|
34
34
|
GetText.create_mofiles()
|
35
35
|
# GetText.create_mofiles(true, "po", "locale") # This is for "Ruby on Rails".
|
@@ -40,7 +40,8 @@ h=Hoe.spec('statsample') do
|
|
40
40
|
#self.testlib=:minitest
|
41
41
|
self.rubyforge_name = "ruby-statsample"
|
42
42
|
self.developer('Claudio Bustos', 'clbustos@gmail.com')
|
43
|
-
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"]
|
43
|
+
self.extra_deps << ["spreadsheet","~>0.6.0"] << ["svg-graph", "~>1.0"] << ["reportbuilder", "~>1.0"] << ["minimization", "~>0.2.0"] << ["fastercsv"] << ["dirty-memoize", "~>0.0"] << ["extendmatrix","~>0.2.0"] << ["statsample-bivariate-extension", "~>0.13.0"]
|
44
|
+
|
44
45
|
self.extra_dev_deps << ["shoulda"]
|
45
46
|
self.clean_globs << "test/images/*" << "demo/item_analysis/*" << "demo/Regression"
|
46
47
|
self.post_install_message = <<-EOF
|
@@ -51,13 +52,13 @@ On *nix, you should install statsample-optimization
|
|
51
52
|
to retrieve gems gsl, statistics2 and a C extension
|
52
53
|
to speed some methods.
|
53
54
|
|
54
|
-
$sudo gem install statsample-optimization
|
55
|
+
$ sudo gem install statsample-optimization
|
55
56
|
|
56
|
-
|
57
|
-
|
58
|
-
|
57
|
+
On Ubuntu, install build-essential and libgsl0-dev
|
58
|
+
using apt-get and compile ruby 1.8 or 1.9 from
|
59
|
+
source code first.
|
59
60
|
|
60
|
-
$sudo apt-get install build-essential libgsl0-dev
|
61
|
+
$ sudo apt-get install build-essential libgsl0-dev
|
61
62
|
|
62
63
|
|
63
64
|
*****************************************************
|
@@ -90,7 +91,7 @@ Rake::RDocTask.new(:docs) do |rd|
|
|
90
91
|
end
|
91
92
|
|
92
93
|
|
93
|
-
desc '
|
94
|
+
desc 'Publish rdocs with analytics support'
|
94
95
|
task :publicar_docs => [:clean, :docs] do
|
95
96
|
ruby %{agregar_adsense_a_doc.rb}
|
96
97
|
path = File.expand_path("~/.rubyforge/user-config.yml")
|
Binary file
|
data/examples/principal_axis.rb
CHANGED
@@ -4,5 +4,7 @@ $:.unshift(File.dirname(__FILE__)+'/../lib/')
|
|
4
4
|
require 'statsample'
|
5
5
|
matrix=Matrix[
|
6
6
|
[1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
|
7
|
+
matrix.extend Statsample::CovariateMatrix
|
8
|
+
#matrix.fields=%w{a b c d}
|
7
9
|
fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1,:smc=>false)
|
8
10
|
puts fa.summary
|
data/examples/u_test.rb
ADDED
data/lib/distribution.rb
CHANGED
data/lib/statsample.rb
CHANGED
@@ -31,9 +31,9 @@ end
|
|
31
31
|
class String
|
32
32
|
def is_number?
|
33
33
|
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
34
|
-
|
34
|
+
true
|
35
35
|
else
|
36
|
-
|
36
|
+
false
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
@@ -112,7 +112,7 @@ module Statsample
|
|
112
112
|
false
|
113
113
|
end
|
114
114
|
end
|
115
|
-
VERSION = '0.
|
115
|
+
VERSION = '0.13.0'
|
116
116
|
SPLIT_TOKEN = ","
|
117
117
|
autoload(:Database, 'statsample/converters')
|
118
118
|
autoload(:Anova, 'statsample/anova')
|
@@ -201,19 +201,19 @@ module Statsample
|
|
201
201
|
|
202
202
|
|
203
203
|
|
204
|
-
|
204
|
+
module Util
|
205
205
|
# Reference: http://www.itl.nist.gov/div898/handbook/eda/section3/normprpl.htm
|
206
206
|
def normal_order_statistic_medians(i,n)
|
207
207
|
if i==1
|
208
|
-
|
208
|
+
u= 1.0 - normal_order_statistic_medians(n,n)
|
209
209
|
elsif i==n
|
210
|
-
|
210
|
+
u=0.5**(1 / n.to_f)
|
211
211
|
else
|
212
|
-
|
212
|
+
u= (i - 0.3175) / (n + 0.365)
|
213
213
|
end
|
214
214
|
u
|
215
215
|
end
|
216
|
-
|
216
|
+
end
|
217
217
|
|
218
218
|
|
219
219
|
|
@@ -224,7 +224,7 @@ module Statsample
|
|
224
224
|
fp.close
|
225
225
|
end
|
226
226
|
end
|
227
|
-
# Provides
|
227
|
+
# Provides method summary to generate summaries and include GetText
|
228
228
|
module Summarizable
|
229
229
|
include GetText
|
230
230
|
bindtextdomain("statsample")
|
@@ -242,12 +242,12 @@ end
|
|
242
242
|
begin
|
243
243
|
require 'statsamplert'
|
244
244
|
rescue LoadError
|
245
|
-
module Statsample
|
246
|
-
|
245
|
+
module Statsample
|
246
|
+
OPTIMIZED=false
|
247
247
|
end
|
248
248
|
end
|
249
249
|
|
250
250
|
require 'statsample/vector'
|
251
251
|
require 'statsample/dataset'
|
252
252
|
require 'statsample/crosstab'
|
253
|
-
require 'statsample/matrix'
|
253
|
+
require 'statsample/matrix'
|
@@ -32,8 +32,8 @@ module Statsample
|
|
32
32
|
@ss_total=@ss_num+@ss_den
|
33
33
|
@ms_total=@ms_num+@ms_den
|
34
34
|
opts_default={:name=>"ANOVA",
|
35
|
-
:name_denominator=>"Explained variance",
|
36
|
-
:name_numerator=>"Unexplained variance"}
|
35
|
+
:name_denominator=>_("Explained variance"),
|
36
|
+
:name_numerator=>_("Unexplained variance")}
|
37
37
|
@opts=opts_default.merge(opts)
|
38
38
|
opts_default.keys.each {|k|
|
39
39
|
send("#{k}=", @opts[k])
|
@@ -89,8 +89,8 @@ module Statsample
|
|
89
89
|
end
|
90
90
|
opts||=Hash.new
|
91
91
|
opts_default={:name=>_("Anova One-Way"),
|
92
|
-
:name_numerator=>"Between Groups",
|
93
|
-
:name_denominator=>"Within Groups",
|
92
|
+
:name_numerator=>_("Between Groups"),
|
93
|
+
:name_denominator=>_("Within Groups"),
|
94
94
|
:summary_descriptives=>false,
|
95
95
|
:summary_levene=>false}
|
96
96
|
@opts=opts_default.merge(opts).merge(:ss_num=>ssbg, :ss_den=>sswg, :df_num=>df_bg, :df_den=>df_wg)
|
data/lib/statsample/bivariate.rb
CHANGED
@@ -1,8 +1,15 @@
|
|
1
|
-
require 'statsample/bivariate/
|
2
|
-
|
1
|
+
require 'statsample/bivariate/pearson'
|
2
|
+
|
3
3
|
module Statsample
|
4
|
-
# Diverse
|
4
|
+
# Diverse methods and classes to calculate bivariate relations
|
5
|
+
# Specific classes:
|
6
|
+
# * Statsample::Bivariate::Pearson : Pearson correlation coefficient (r)
|
7
|
+
# * Statsample::Bivariate::Tetrachoric : Tetrachoric correlation
|
8
|
+
# * Statsample::Bivariate::Polychoric : Polychoric correlation (using joint, two-step and polychoric series)
|
5
9
|
module Bivariate
|
10
|
+
autoload(:Polychoric, "statsample/bivariate/polychoric")
|
11
|
+
autoload(:Tetrachoric, "statsample/bivariate/tetrachoric")
|
12
|
+
|
6
13
|
class << self
|
7
14
|
# Covariance between two vectors
|
8
15
|
def covariance(v1,v2)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Bivariate
|
3
|
+
# = Pearson correlation coefficient (r)
|
4
|
+
#
|
5
|
+
# The moment-product Pearson's correlation coefficient, known as 'r'
|
6
|
+
# is a measure of bivariate associate between two continous
|
7
|
+
# variables.
|
8
|
+
#
|
9
|
+
# == Usage
|
10
|
+
# a = [1,2,3,4,5,6].to_scale
|
11
|
+
# b = [2,3,4,5,6,7].to_scale
|
12
|
+
# pearson = Statsample::Bivariate::Pearson.new(a,b)
|
13
|
+
# puts pearson.r
|
14
|
+
# puts pearson.t
|
15
|
+
# puts pearson.probability
|
16
|
+
#
|
17
|
+
# puts pearson.summary
|
18
|
+
#
|
19
|
+
class Pearson
|
20
|
+
|
21
|
+
include Statsample::Test
|
22
|
+
include Summarizable
|
23
|
+
# Name of correlation
|
24
|
+
attr_accessor :name
|
25
|
+
# Tails for probability (:both, :left or :right)
|
26
|
+
attr_accessor :tails
|
27
|
+
attr_accessor :n
|
28
|
+
def initialize(v1,v2,opts=Hash.new)
|
29
|
+
@v1_name,@v2_name = v1.name,v2.name
|
30
|
+
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
|
31
|
+
@n=@v1.size
|
32
|
+
opts_default={
|
33
|
+
:name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
|
34
|
+
:tails=>:both
|
35
|
+
}
|
36
|
+
@opts=opts.merge(opts_default)
|
37
|
+
@opts.each{|k,v|
|
38
|
+
self.send("#{k}=",v) if self.respond_to? k
|
39
|
+
}
|
40
|
+
end
|
41
|
+
def r
|
42
|
+
Statsample::Bivariate.pearson(@v1,@v2)
|
43
|
+
end
|
44
|
+
def t
|
45
|
+
Statsample::Bivariate.t_pearson(@v1,@v2)
|
46
|
+
end
|
47
|
+
def probability
|
48
|
+
p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
|
49
|
+
end
|
50
|
+
def report_building(builder)
|
51
|
+
builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/statsample/dataset.rb
CHANGED
@@ -39,13 +39,15 @@ module Statsample
|
|
39
39
|
#
|
40
40
|
#
|
41
41
|
# ==Usage
|
42
|
-
# Create a empty dataset
|
42
|
+
# Create a empty dataset:
|
43
43
|
# Dataset.new()
|
44
|
-
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt
|
44
|
+
# Create a dataset with three empty vectors, called <tt>v1</tt>, <tt>v2</tt> and <tt>v3</tt>:
|
45
45
|
# Dataset.new(%w{v1 v2 v3})
|
46
|
-
# Create a dataset with two vectors
|
46
|
+
# Create a dataset with two vectors, called <tt>v1</tt>
|
47
|
+
# and <tt>v2</tt>:
|
47
48
|
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
48
|
-
# Create a dataset with two given vectors (v1 and v2),
|
49
|
+
# Create a dataset with two given vectors (v1 and v2),
|
50
|
+
# with vectors on inverted order:
|
49
51
|
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
|
50
52
|
#
|
51
53
|
# The fast way to create a dataset uses Hash#to_dataset, with
|
@@ -59,7 +61,7 @@ module Statsample
|
|
59
61
|
include Summarizable
|
60
62
|
# Hash of Statsample::Vector
|
61
63
|
attr_reader :vectors
|
62
|
-
# Ordered
|
64
|
+
# Ordered ids of vectors
|
63
65
|
attr_reader :fields
|
64
66
|
# Name of dataset
|
65
67
|
attr_accessor:name
|
@@ -67,7 +69,7 @@ module Statsample
|
|
67
69
|
attr_reader :cases
|
68
70
|
# Location of pointer on enumerations methods (like #each)
|
69
71
|
attr_reader :i
|
70
|
-
|
72
|
+
|
71
73
|
# Generates a new dataset, using three vectors
|
72
74
|
# - Rows
|
73
75
|
# - Columns
|
@@ -87,7 +89,8 @@ module Statsample
|
|
87
89
|
# b 1 0
|
88
90
|
#
|
89
91
|
# Useful to process outputs from databases
|
90
|
-
#
|
92
|
+
#
|
93
|
+
|
91
94
|
def self.crosstab_by_asignation(rows,columns,values)
|
92
95
|
raise "Three vectors should be equal size" if rows.size!=columns.size or rows.size!=values.size
|
93
96
|
cols_values=columns.factors
|
@@ -123,7 +126,6 @@ module Statsample
|
|
123
126
|
# [fields] Array of names for vectors. Is only used for set the
|
124
127
|
# order of variables. If empty, vectors keys on alfabethic order as
|
125
128
|
# used as fields
|
126
|
-
|
127
129
|
#
|
128
130
|
def initialize(vectors={}, fields=[])
|
129
131
|
@@n_dataset||=0
|
@@ -141,6 +143,10 @@ module Statsample
|
|
141
143
|
end
|
142
144
|
@i=nil
|
143
145
|
end
|
146
|
+
|
147
|
+
#
|
148
|
+
# Returns a GSL::matrix
|
149
|
+
#
|
144
150
|
def to_gsl_matrix
|
145
151
|
matrix=GSL::Matrix.alloc(cases,@vectors.size)
|
146
152
|
each_array do |row|
|
@@ -171,7 +177,7 @@ module Statsample
|
|
171
177
|
end
|
172
178
|
# Returns a duplicate of the Database
|
173
179
|
# If fields given, only include those vectors.
|
174
|
-
# Every vector will be dup
|
180
|
+
# Every vector will be dup.
|
175
181
|
def dup(*fields_to_include)
|
176
182
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
177
183
|
fields_to_include=fields_to_include[0]
|
@@ -186,6 +192,10 @@ module Statsample
|
|
186
192
|
}
|
187
193
|
Dataset.new(vectors,fields)
|
188
194
|
end
|
195
|
+
# Returns (when possible) a cheap copy of dataset.
|
196
|
+
# If no vector have missing values, returns original vectors.
|
197
|
+
# If missing values presents, uses Dataset.dup_only_valid
|
198
|
+
#
|
189
199
|
def clone_only_valid(*fields_to_include)
|
190
200
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
191
201
|
fields_to_include=fields_to_include[0]
|
@@ -240,7 +250,7 @@ module Statsample
|
|
240
250
|
ds_new.update_valid_data
|
241
251
|
ds_new
|
242
252
|
end
|
243
|
-
|
253
|
+
# Returns a dataset with standarized data
|
244
254
|
def standarize
|
245
255
|
ds=dup()
|
246
256
|
ds.fields.each do |f|
|
@@ -261,15 +271,18 @@ module Statsample
|
|
261
271
|
def ==(d2)
|
262
272
|
@vectors==d2.vectors and @fields==d2.fields
|
263
273
|
end
|
274
|
+
# Returns vector <tt>c</tt>
|
264
275
|
def col(c)
|
265
276
|
@vectors[c]
|
266
277
|
end
|
267
278
|
alias_method :vector, :col
|
268
|
-
|
279
|
+
# Equal to Dataset[<tt>name</tt>]=<tt>vector</tt>
|
280
|
+
def add_vector(name, vector)
|
269
281
|
raise ArgumentError, "Vector have different size" if vector.size!=@cases
|
270
282
|
@vectors[name]=vector
|
271
283
|
check_order
|
272
284
|
end
|
285
|
+
# Returns true if dataset have vector <tt>v</tt>
|
273
286
|
def has_vector? (v)
|
274
287
|
return @vectors.has_key?(v)
|
275
288
|
end
|
@@ -295,8 +308,8 @@ module Statsample
|
|
295
308
|
# * Hash: keys equal to fields
|
296
309
|
# If uvd is false, #update_valid_data is not executed after
|
297
310
|
# inserting a case. This is very useful if you want to increase the
|
298
|
-
# performance on inserting many cases,
|
299
|
-
#
|
311
|
+
# performance on inserting many cases, because #update_valid_data
|
312
|
+
# performs check on vectors and on the dataset
|
300
313
|
|
301
314
|
def add_case(v,uvd=true)
|
302
315
|
case v
|
@@ -323,7 +336,7 @@ module Statsample
|
|
323
336
|
@fields.each{|f| @vectors[f].set_valid_data}
|
324
337
|
check_length
|
325
338
|
end
|
326
|
-
# Delete
|
339
|
+
# Delete vector named <tt>name</tt>.
|
327
340
|
def delete_vector(name)
|
328
341
|
@fields.delete(name)
|
329
342
|
@vectors.delete(name)
|
@@ -345,26 +358,27 @@ module Statsample
|
|
345
358
|
add_vector(name+join+k,v)
|
346
359
|
}
|
347
360
|
end
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
361
|
+
|
362
|
+
def vector_by_calculation(type=:scale)
|
363
|
+
a=[]
|
364
|
+
each do |row|
|
365
|
+
a.push(yield(row))
|
366
|
+
end
|
367
|
+
a.to_vector(type)
|
368
|
+
end
|
369
|
+
# Returns a vector with sumatory of fields
|
370
|
+
# if fields parameter is empty, sum all fields
|
371
|
+
def vector_sum(fields=nil)
|
372
|
+
a=[]
|
373
|
+
fields||=@fields
|
374
|
+
collect_with_index do |row, i|
|
375
|
+
if(fields.find{|f| !@vectors[f].data_with_nils[i]})
|
376
|
+
nil
|
377
|
+
else
|
378
|
+
fields.inject(0) {|ac,v| ac + row[v].to_f}
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
368
382
|
# Check if #fields attribute is correct, after inserting or deleting vectors
|
369
383
|
def check_fields(fields)
|
370
384
|
fields||=@fields
|
@@ -614,14 +628,14 @@ module Statsample
|
|
614
628
|
ds
|
615
629
|
end
|
616
630
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
631
|
+
# creates a new vector with the data of a given field which the block returns true
|
632
|
+
def filter_field(field)
|
633
|
+
a=[]
|
634
|
+
each do |c|
|
635
|
+
a.push(c[field]) if yield c
|
636
|
+
end
|
637
|
+
a.to_vector(@vectors[field].type)
|
638
|
+
end
|
625
639
|
|
626
640
|
def to_multiset_by_split_one_field(field)
|
627
641
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
@@ -802,8 +816,7 @@ module Statsample
|
|
802
816
|
ds.update_valid_data
|
803
817
|
ds
|
804
818
|
end
|
805
|
-
|
806
|
-
def report_building(b)
|
819
|
+
def report_building(b)
|
807
820
|
b.section(:name=>@name) do |g|
|
808
821
|
g.text _"Cases: %d" % cases
|
809
822
|
@fields.each do |f|
|
@@ -811,11 +824,6 @@ module Statsample
|
|
811
824
|
g.parse_element(@vectors[f])
|
812
825
|
end
|
813
826
|
end
|
814
|
-
end
|
815
|
-
def as_r
|
816
|
-
require 'rsruby/dataframe'
|
817
|
-
r=RSRuby.instance
|
818
|
-
|
819
827
|
end
|
820
828
|
end
|
821
829
|
end
|