statsample 1.5.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +15 -0
- data/.gitignore +1 -0
- data/.travis.yml +19 -7
- data/CONTRIBUTING.md +33 -0
- data/History.txt +5 -0
- data/README.md +41 -53
- data/benchmarks/correlation_matrix_15_variables.rb +6 -5
- data/benchmarks/correlation_matrix_5_variables.rb +6 -5
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
- data/examples/boxplot.rb +17 -5
- data/examples/correlation_matrix.rb +36 -7
- data/examples/dataset.rb +25 -5
- data/examples/dominance_analysis.rb +8 -7
- data/examples/dominance_analysis_bootstrap.rb +16 -11
- data/examples/histogram.rb +16 -2
- data/examples/icc.rb +5 -6
- data/examples/levene.rb +17 -3
- data/examples/multiple_regression.rb +6 -3
- data/examples/parallel_analysis.rb +11 -6
- data/examples/polychoric.rb +26 -13
- data/examples/principal_axis.rb +8 -4
- data/examples/reliability.rb +10 -10
- data/examples/scatterplot.rb +8 -0
- data/examples/t_test.rb +7 -0
- data/examples/u_test.rb +10 -2
- data/examples/vector.rb +9 -6
- data/examples/velicer_map_test.rb +12 -8
- data/lib/statsample.rb +13 -47
- data/lib/statsample/analysis/suite.rb +1 -1
- data/lib/statsample/anova/oneway.rb +6 -6
- data/lib/statsample/anova/twoway.rb +26 -24
- data/lib/statsample/bivariate.rb +78 -61
- data/lib/statsample/bivariate/pearson.rb +2 -2
- data/lib/statsample/codification.rb +45 -32
- data/lib/statsample/converter/csv.rb +15 -53
- data/lib/statsample/converter/spss.rb +6 -5
- data/lib/statsample/converters.rb +50 -211
- data/lib/statsample/crosstab.rb +26 -25
- data/lib/statsample/daru.rb +117 -0
- data/lib/statsample/dataset.rb +70 -942
- data/lib/statsample/dominanceanalysis.rb +16 -17
- data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
- data/lib/statsample/factor/parallelanalysis.rb +17 -19
- data/lib/statsample/factor/pca.rb +21 -20
- data/lib/statsample/factor/principalaxis.rb +3 -3
- data/lib/statsample/graph/boxplot.rb +8 -16
- data/lib/statsample/graph/histogram.rb +4 -4
- data/lib/statsample/graph/scatterplot.rb +8 -7
- data/lib/statsample/histogram.rb +128 -119
- data/lib/statsample/matrix.rb +20 -16
- data/lib/statsample/multiset.rb +39 -38
- data/lib/statsample/regression.rb +3 -3
- data/lib/statsample/regression/multiple.rb +8 -10
- data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
- data/lib/statsample/regression/multiple/baseengine.rb +32 -32
- data/lib/statsample/regression/multiple/gslengine.rb +33 -36
- data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
- data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
- data/lib/statsample/reliability.rb +23 -25
- data/lib/statsample/reliability/icc.rb +8 -7
- data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
- data/lib/statsample/reliability/scaleanalysis.rb +58 -60
- data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
- data/lib/statsample/resample.rb +1 -1
- data/lib/statsample/shorthand.rb +29 -25
- data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
- data/lib/statsample/test/levene.rb +28 -27
- data/lib/statsample/test/t.rb +7 -9
- data/lib/statsample/test/umannwhitney.rb +28 -28
- data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
- data/lib/statsample/vector.rb +70 -1013
- data/lib/statsample/version.rb +1 -1
- data/statsample.gemspec +12 -16
- data/test/helpers_tests.rb +1 -1
- data/test/test_analysis.rb +17 -17
- data/test/test_anova_contrast.rb +6 -6
- data/test/test_anovatwowaywithdataset.rb +8 -8
- data/test/test_anovawithvectors.rb +8 -8
- data/test/test_awesome_print_bug.rb +1 -1
- data/test/test_bartlettsphericity.rb +4 -4
- data/test/test_bivariate.rb +48 -43
- data/test/test_codification.rb +33 -33
- data/test/test_crosstab.rb +9 -9
- data/test/test_dataset.rb +28 -458
- data/test/test_factor.rb +46 -38
- data/test/test_factor_pa.rb +22 -13
- data/test/test_ggobi.rb +4 -4
- data/test/test_gsl.rb +4 -4
- data/test/test_histogram.rb +3 -3
- data/test/test_matrix.rb +13 -13
- data/test/test_multiset.rb +103 -91
- data/test/test_regression.rb +57 -52
- data/test/test_reliability.rb +55 -45
- data/test/test_reliability_icc.rb +8 -8
- data/test/test_reliability_skillscale.rb +26 -24
- data/test/test_resample.rb +1 -1
- data/test/test_statistics.rb +3 -13
- data/test/test_stest.rb +9 -9
- data/test/test_stratified.rb +3 -3
- data/test/test_test_t.rb +12 -12
- data/test/test_umannwhitney.rb +2 -2
- data/test/test_vector.rb +76 -613
- data/test/test_wilcoxonsignedrank.rb +4 -4
- metadata +57 -28
- data/lib/statsample/rserve_extension.rb +0 -20
- data/lib/statsample/vector/gsl.rb +0 -106
- data/test/fixtures/repeated_fields.csv +0 -7
- data/test/fixtures/scientific_notation.csv +0 -4
- data/test/fixtures/test_csv.csv +0 -7
- data/test/fixtures/test_xls.xls +0 -0
- data/test/test_csv.rb +0 -63
- data/test/test_rserve_extension.rb +0 -42
- data/test/test_xls.rb +0 -52
@@ -6,10 +6,10 @@ module Statsample
|
|
6
6
|
#
|
7
7
|
# == Usage
|
8
8
|
# === Svg output
|
9
|
-
#
|
10
|
-
#
|
9
|
+
# a = Daru::Vector.new([1,2,3,4])
|
10
|
+
# puts Statsample::Graph::Histogram.new(a).to_svg
|
11
11
|
# === Using ReportBuilder
|
12
|
-
# a=[1,2,3,4]
|
12
|
+
# a = Daru::Vector.new([1,2,3,4])
|
13
13
|
# rb=ReportBuilder.new
|
14
14
|
# rb.add(Statsample::Graph::Histogram.new(a))
|
15
15
|
# rb.save_html('histogram.html')
|
@@ -70,7 +70,7 @@ module Statsample
|
|
70
70
|
@hist=@data
|
71
71
|
@mean=@hist.estimated_mean
|
72
72
|
@sd=@hist.estimated_standard_deviation
|
73
|
-
elsif @data.is_a?
|
73
|
+
elsif @data.is_a? Daru::Vector
|
74
74
|
@mean=@data.mean
|
75
75
|
@sd=@data.sd
|
76
76
|
@bins||=Math::sqrt(@data.size).floor
|
@@ -10,12 +10,12 @@ module Statsample
|
|
10
10
|
# The data is displayed as a collection of points, each having the value of one variable determining the position on the horizontal axis and the value of the other variable determining the position on the vertical axis.[2] This kind of plot is also called a scatter chart, scatter diagram and scatter graph.
|
11
11
|
# == Usage
|
12
12
|
# === Svg output
|
13
|
-
# a=[1,2,3,4]
|
14
|
-
# b=[3,4,5,6]
|
13
|
+
# a = Daru::Vector.new([1,2,3,4])
|
14
|
+
# b = Daru::Vector.new([3,4,5,6])
|
15
15
|
# puts Statsample::Graph::Scatterplot.new(a,b).to_svg
|
16
16
|
# === Using ReportBuilder
|
17
|
-
# a=[1,2,3,4]
|
18
|
-
# b=[3,4,5,6]
|
17
|
+
# a = Daru::Vector.new([1,2,3,4])
|
18
|
+
# b = Daru::Vector.new([3,4,5,6])
|
19
19
|
# rb=ReportBuilder.new
|
20
20
|
# rb.add(Statsample::Graph::Scatterplot.new(a,b))
|
21
21
|
# rb.save_html('scatter.html')
|
@@ -195,17 +195,18 @@ module Statsample
|
|
195
195
|
end
|
196
196
|
vis
|
197
197
|
end
|
198
|
+
|
198
199
|
# Returns SVG with scatterplot
|
199
200
|
def to_svg
|
200
|
-
rp=rubyvis_panel
|
201
|
+
rp = rubyvis_panel
|
201
202
|
rp.render
|
202
203
|
rp.to_svg
|
203
204
|
end
|
205
|
+
|
204
206
|
def report_building(builder) # :nodoc:
|
205
207
|
builder.section(:name=>name) do |b|
|
206
208
|
b.image(to_svg, :type=>'svg', :width=>width, :height=>height)
|
207
|
-
end
|
208
|
-
|
209
|
+
end
|
209
210
|
end
|
210
211
|
end
|
211
212
|
end
|
data/lib/statsample/histogram.rb
CHANGED
@@ -37,135 +37,144 @@ module Statsample
|
|
37
37
|
# == Reference:
|
38
38
|
# * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
end
|
48
|
-
# Alloc +n_bins+ bins, using +p1+ as minimum and +p2+
|
49
|
-
# as maximum
|
50
|
-
def alloc_uniform(n_bins, p1=nil,p2=nil)
|
51
|
-
if p1.is_a? Array
|
52
|
-
min,max=p1
|
53
|
-
else
|
54
|
-
min,max=p1,p2
|
55
|
-
end
|
56
|
-
range=max - min
|
57
|
-
step=range / n_bins.to_f
|
58
|
-
range=(n_bins+1).times.map {|i| min + (step*i)}
|
59
|
-
Histogram.new(range)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
attr_accessor :name
|
63
|
-
attr_reader :bin
|
64
|
-
attr_reader :range
|
65
|
-
include GetText
|
66
|
-
bindtextdomain("statsample")
|
67
|
-
def initialize(p1, min_max=false, opts=Hash.new)
|
40
|
+
class Histogram
|
41
|
+
include Enumerable
|
42
|
+
|
43
|
+
class << self
|
44
|
+
# Alloc +n_bins+, using +range+ as ranges of bins
|
45
|
+
def alloc(n_bins, range=nil, opts=Hash.new)
|
46
|
+
Histogram.new(n_bins, range, opts)
|
68
47
|
|
48
|
+
end
|
49
|
+
# Alloc +n_bins+ bins, using +p1+ as minimum and +p2+
|
50
|
+
# as maximum
|
51
|
+
def alloc_uniform(n_bins, p1=nil,p2=nil)
|
69
52
|
if p1.is_a? Array
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
@n_bins=p1
|
53
|
+
min,max=p1
|
54
|
+
else
|
55
|
+
min,max=p1,p2
|
74
56
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
range=Array.new(@n_bins+1)
|
80
|
-
(@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
|
81
|
-
end
|
82
|
-
range||=[0.0]*(@n_bins+1)
|
83
|
-
set_ranges(range)
|
84
|
-
@name=""
|
85
|
-
opts.each{|k,v|
|
86
|
-
self.send("#{k}=",v) if self.respond_to? k
|
87
|
-
}
|
57
|
+
range=max - min
|
58
|
+
step=range / n_bins.to_f
|
59
|
+
range=(n_bins+1).times.map {|i| min + (step*i)}
|
60
|
+
Histogram.new(range)
|
88
61
|
end
|
89
|
-
|
90
|
-
|
91
|
-
|
62
|
+
end
|
63
|
+
|
64
|
+
attr_accessor :name
|
65
|
+
attr_reader :bin
|
66
|
+
attr_reader :range
|
67
|
+
|
68
|
+
include GetText
|
69
|
+
bindtextdomain("statsample")
|
70
|
+
|
71
|
+
def initialize(p1, min_max=false, opts=Hash.new)
|
72
|
+
|
73
|
+
if p1.is_a? Array
|
74
|
+
range=p1
|
75
|
+
@n_bins=p1.size-1
|
76
|
+
elsif p1.is_a? Integer
|
77
|
+
@n_bins=p1
|
92
78
|
end
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
79
|
+
|
80
|
+
@bin=[0.0]*(@n_bins)
|
81
|
+
if(min_max)
|
82
|
+
min, max=min_max[0], min_max[1]
|
83
|
+
range=Array.new(@n_bins+1)
|
84
|
+
(@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) }
|
85
|
+
end
|
86
|
+
range||=[0.0]*(@n_bins+1)
|
87
|
+
set_ranges(range)
|
88
|
+
@name=""
|
89
|
+
opts.each{|k,v|
|
90
|
+
self.send("#{k}=",v) if self.respond_to? k
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
# Number of bins
|
95
|
+
def bins
|
96
|
+
@n_bins
|
97
|
+
end
|
98
|
+
|
99
|
+
def increment(x, w=1)
|
100
|
+
if x.respond_to? :each
|
101
|
+
x.each{|y| increment(y,w) }
|
102
|
+
elsif x.is_a? Numeric
|
103
|
+
(range.size - 1).times do |i|
|
104
|
+
if x >= range[i] and x < range[i+1]
|
105
|
+
@bin[i] += w
|
106
|
+
break
|
103
107
|
end
|
104
108
|
end
|
105
109
|
end
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
each do |v|
|
136
|
-
sum+=v[:value]*(v[:middle]-mean)**2
|
137
|
-
n+=v[:value]
|
138
|
-
end
|
139
|
-
sum / (n-1)
|
140
|
-
end
|
141
|
-
def estimated_standard_deviation
|
142
|
-
Math::sqrt(estimated_variance)
|
143
|
-
end
|
144
|
-
def estimated_mean
|
145
|
-
sum,n=0,0
|
146
|
-
each do |v|
|
147
|
-
sum+= v[:value]* v[:middle]
|
148
|
-
n+=v[:value]
|
149
|
-
end
|
150
|
-
sum / n
|
151
|
-
end
|
152
|
-
alias :mean :estimated_mean
|
153
|
-
alias :sigma :estimated_standard_deviation
|
154
|
-
|
155
|
-
def sum(start=nil,_end=nil)
|
156
|
-
start||=0
|
157
|
-
_end||=@n_bins-1
|
158
|
-
(start.._end).inject(0) {|ac,i| ac+@bin[i]}
|
110
|
+
end
|
111
|
+
|
112
|
+
def set_ranges(range)
|
113
|
+
raise "Range size should be bin+1" if range.size!=@bin.size+1
|
114
|
+
@range=range
|
115
|
+
end
|
116
|
+
|
117
|
+
def get_range(i)
|
118
|
+
[@range[i],@range[i+1]]
|
119
|
+
end
|
120
|
+
|
121
|
+
def max
|
122
|
+
@range.last
|
123
|
+
end
|
124
|
+
|
125
|
+
def min
|
126
|
+
@range.first
|
127
|
+
end
|
128
|
+
def max_val
|
129
|
+
@bin.max
|
130
|
+
end
|
131
|
+
def min_val
|
132
|
+
@bin.min
|
133
|
+
end
|
134
|
+
def each
|
135
|
+
bins.times.each do |i|
|
136
|
+
r=get_range(i)
|
137
|
+
arg={:i=>i, :low=>r[0],:high=>r[1], :middle=>(r[0]+r[1]) / 2.0, :value=>@bin[i]}
|
138
|
+
yield arg
|
159
139
|
end
|
160
|
-
|
161
|
-
|
162
|
-
|
140
|
+
end
|
141
|
+
def estimated_variance
|
142
|
+
sum,n=0,0
|
143
|
+
mean=estimated_mean
|
144
|
+
each do |v|
|
145
|
+
sum+=v[:value]*(v[:middle]-mean)**2
|
146
|
+
n+=v[:value]
|
147
|
+
end
|
148
|
+
sum / (n-1)
|
149
|
+
end
|
150
|
+
def estimated_standard_deviation
|
151
|
+
Math::sqrt(estimated_variance)
|
152
|
+
end
|
153
|
+
def estimated_mean
|
154
|
+
sum,n=0,0
|
155
|
+
each do |v|
|
156
|
+
sum+= v[:value]* v[:middle]
|
157
|
+
n+=v[:value]
|
163
158
|
end
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
159
|
+
sum / n
|
160
|
+
end
|
161
|
+
alias :mean :estimated_mean
|
162
|
+
alias :sigma :estimated_standard_deviation
|
163
|
+
|
164
|
+
def sum(start=nil,_end=nil)
|
165
|
+
start||=0
|
166
|
+
_end||=@n_bins-1
|
167
|
+
(start.._end).inject(0) {|ac,i| ac+@bin[i]}
|
168
|
+
end
|
169
|
+
def report_building(generator)
|
170
|
+
hg=Statsample::Graph::Histogram.new(self)
|
171
|
+
generator.parse_element(hg)
|
172
|
+
end
|
173
|
+
def report_building_text(generator)
|
174
|
+
@range.each_with_index do |r,i|
|
175
|
+
next if i==@bin.size
|
176
|
+
generator.text(sprintf("%5.2f : %d", r, @bin[i]))
|
169
177
|
end
|
170
178
|
end
|
179
|
+
end
|
171
180
|
end
|
data/lib/statsample/matrix.rb
CHANGED
@@ -11,21 +11,23 @@ class ::Matrix
|
|
11
11
|
self
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
15
|
-
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i|
|
16
|
-
|
14
|
+
def to_dataframe
|
15
|
+
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| "VAR_#{i+1}".to_sym }
|
16
|
+
f = [f] unless f.is_a?(Array)
|
17
|
+
ds = Daru::DataFrame.new({}, order: f)
|
17
18
|
f.each do |ff|
|
18
|
-
ds[ff].
|
19
|
-
ds[ff].name=ff
|
19
|
+
ds[ff].rename ff
|
20
20
|
end
|
21
21
|
row_size.times {|i|
|
22
|
-
ds.
|
22
|
+
ds.add_row(self.row(i).to_a)
|
23
23
|
}
|
24
|
-
ds.
|
25
|
-
ds.
|
24
|
+
ds.update
|
25
|
+
ds.rename(self.name) if self.respond_to? :name
|
26
26
|
ds
|
27
27
|
end
|
28
28
|
|
29
|
+
alias :to_dataset :to_dataframe
|
30
|
+
|
29
31
|
if defined? :eigenpairs
|
30
32
|
alias_method :eigenpairs_ruby, :eigenpairs
|
31
33
|
end
|
@@ -83,21 +85,23 @@ module GSL
|
|
83
85
|
self
|
84
86
|
end
|
85
87
|
|
86
|
-
def
|
87
|
-
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i|
|
88
|
-
ds=
|
88
|
+
def to_dataframe
|
89
|
+
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map { |i| "VAR_#{i+1}".to_sym }
|
90
|
+
ds=Daru::DataFrame.new({}, order: f)
|
89
91
|
f.each do |ff|
|
90
|
-
ds[ff].
|
91
|
-
ds[ff].name=ff
|
92
|
+
ds[ff].rename ff
|
92
93
|
end
|
94
|
+
|
93
95
|
row_size.times {|i|
|
94
|
-
ds.
|
96
|
+
ds.add_row(self.row(i).to_a)
|
95
97
|
}
|
96
|
-
ds.
|
97
|
-
ds.
|
98
|
+
ds.update
|
99
|
+
ds.rename(self.name) if self.respond_to? :name
|
98
100
|
ds
|
99
101
|
end
|
100
102
|
|
103
|
+
alias :to_dataset :to_dataframe
|
104
|
+
|
101
105
|
def row_size
|
102
106
|
size1
|
103
107
|
end
|
data/lib/statsample/multiset.rb
CHANGED
@@ -5,20 +5,21 @@ module Statsample
|
|
5
5
|
class Multiset
|
6
6
|
# Name of fields
|
7
7
|
attr_reader :fields
|
8
|
-
# Array with
|
8
|
+
# Array with Daru::DataFrame
|
9
9
|
attr_reader :datasets
|
10
10
|
# To create a multiset
|
11
11
|
# * Multiset.new(%w{f1 f2 f3}) # define only fields
|
12
12
|
def initialize(fields)
|
13
|
-
|
14
|
-
|
13
|
+
@fields=fields
|
14
|
+
@datasets={}
|
15
15
|
end
|
16
16
|
def self.new_empty_vectors(fields,ds_names)
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
ms = Multiset.new(fields)
|
18
|
+
ds_names.each do |d|
|
19
|
+
ms.add_dataset(d, Daru::DataFrame.new({}, order: fields))
|
20
|
+
end
|
21
|
+
|
22
|
+
ms
|
22
23
|
end
|
23
24
|
# Generate a new dataset as a union of partial dataset
|
24
25
|
# If block given, this is applied to each dataset before union
|
@@ -29,65 +30,65 @@ module Statsample
|
|
29
30
|
labels={}
|
30
31
|
each do |k,ds|
|
31
32
|
if block
|
32
|
-
ds=ds.dup
|
33
|
+
ds = ds.dup
|
33
34
|
yield k,ds
|
34
35
|
end
|
35
36
|
@fields.each do |f|
|
36
|
-
union_field[f]||=Array.new
|
37
|
-
union_field[f].concat(ds[f].
|
38
|
-
types[f]||=ds[f].type
|
39
|
-
names[f]||=ds[f].name
|
40
|
-
labels[f]||=ds[f].
|
37
|
+
union_field[f] ||= Array.new
|
38
|
+
union_field[f].concat(ds[f].to_a)
|
39
|
+
types[f] ||= ds[f].type
|
40
|
+
names[f] ||= ds[f].name
|
41
|
+
labels[f] ||= ds[f].index.to_a
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
44
45
|
@fields.each do |f|
|
45
|
-
union_field[f]=
|
46
|
-
union_field[f].
|
47
|
-
union_field[f].labels=labels[f]
|
46
|
+
union_field[f] = Daru::Vector.new(union_field[f])
|
47
|
+
union_field[f].rename names[f]
|
48
48
|
end
|
49
|
-
|
50
|
-
ds_union.fields
|
49
|
+
|
50
|
+
ds_union = Daru::DataFrame.new(union_field, order: @fields)
|
51
51
|
ds_union
|
52
52
|
end
|
53
|
+
|
53
54
|
def datasets_names
|
54
|
-
|
55
|
+
@datasets.keys.sort
|
55
56
|
end
|
57
|
+
|
56
58
|
def n_datasets
|
57
|
-
|
59
|
+
@datasets.size
|
58
60
|
end
|
61
|
+
|
59
62
|
def add_dataset(key,ds)
|
60
|
-
if
|
61
|
-
|
63
|
+
if ds.vectors.to_a != @fields
|
64
|
+
raise ArgumentError, "Dataset(#{ds.vectors.to_a.to_s})must have the same fields of the Multiset(#{@fields})"
|
62
65
|
else
|
63
|
-
|
66
|
+
@datasets[key] = ds
|
64
67
|
end
|
65
68
|
end
|
66
69
|
def sum_field(field)
|
67
70
|
@datasets.inject(0) {|a,da|
|
68
|
-
stratum_name=da[0]
|
69
|
-
vector=da[1][field]
|
70
|
-
val=yield stratum_name,vector
|
71
|
-
a+val
|
71
|
+
stratum_name = da[0]
|
72
|
+
vector = da[1][field]
|
73
|
+
val = yield stratum_name,vector
|
74
|
+
a + val
|
72
75
|
}
|
73
76
|
end
|
74
77
|
def collect_vector(field)
|
75
|
-
@datasets.collect {|k,v|
|
76
|
-
yield k, v[field]
|
77
|
-
}
|
78
|
+
@datasets.collect { |k,v| yield k, v[field] }
|
78
79
|
end
|
79
80
|
|
80
81
|
def each_vector(field)
|
81
|
-
@datasets.each {|k,v|
|
82
|
-
yield k, v[field]
|
83
|
-
}
|
82
|
+
@datasets.each { |k,v| yield k, v[field] }
|
84
83
|
end
|
85
|
-
|
84
|
+
|
85
|
+
def [](i)
|
86
86
|
@datasets[i]
|
87
87
|
end
|
88
|
+
|
88
89
|
def each(&block)
|
89
90
|
@datasets.each {|k,ds|
|
90
|
-
next if ds.
|
91
|
+
next if ds.nrows == 0
|
91
92
|
block.call(k,ds)
|
92
93
|
}
|
93
94
|
end
|
@@ -204,9 +205,9 @@ module Statsample
|
|
204
205
|
@ms=ms
|
205
206
|
raise ArgumentError,"You should put a strata size for each dataset" if strata_sizes.keys.sort!=ms.datasets_names
|
206
207
|
@strata_sizes=strata_sizes
|
207
|
-
@population_size=@strata_sizes.inject(0) {|a,x| a+x[1]}
|
208
|
+
@population_size=@strata_sizes.inject(0) { |a,x| a+x[1] }
|
208
209
|
@strata_number=@ms.n_datasets
|
209
|
-
@sample_size=@ms.datasets.inject(0) {|a,x| a+x[1].
|
210
|
+
@sample_size=@ms.datasets.inject(0) { |a,x| a+x[1].nrows }
|
210
211
|
end
|
211
212
|
# Number of strata
|
212
213
|
def strata_number
|