statsample 0.15.0 → 0.15.1
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +7 -0
- data/Rakefile +4 -2
- data/lib/statsample.rb +1 -1
- data/lib/statsample/converters.rb +12 -9
- data/lib/statsample/dataset.rb +53 -24
- data/lib/statsample/reliability.rb +3 -0
- data/lib/statsample/reliability/icc.rb +11 -0
- data/lib/statsample/reliability/scaleanalysis.rb +72 -14
- data/lib/statsample/vector.rb +5 -2
- data/references.txt +1 -1
- data/test/test_dataset.rb +4 -0
- data/test/test_reliability_icc.rb +17 -5
- data/test/test_rserve_extension.rb +3 -0
- data/test/test_vector.rb +8 -4
- metadata +7 -7
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.15.1 / 2010-10-20
|
2
|
+
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
|
3
|
+
* Statsample::Dataset.delete_vector accept multiple fields.
|
4
|
+
* Statsample::Dataset.dup_only_valid allows duplication of specific fields
|
5
|
+
* ScaleAnalysis doesn't crash on one-item scales
|
6
|
+
* Updated references
|
7
|
+
|
1
8
|
=== 0.15.0 / 2010-09-07
|
2
9
|
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
|
3
10
|
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
|
data/Rakefile
CHANGED
@@ -22,6 +22,7 @@ end
|
|
22
22
|
task :release do
|
23
23
|
system %{git push origin master}
|
24
24
|
end
|
25
|
+
|
25
26
|
desc "Update pot/po files."
|
26
27
|
task "gettext:updatepo" do
|
27
28
|
require 'gettext/tools'
|
@@ -48,14 +49,14 @@ h=Hoe.spec('statsample') do
|
|
48
49
|
***************************************************
|
49
50
|
Thanks for installing statsample.
|
50
51
|
|
51
|
-
On *nix, you
|
52
|
+
On *nix, you could install statsample-optimization
|
52
53
|
to retrieve gems gsl, statistics2 and a C extension
|
53
54
|
to speed some methods.
|
54
55
|
|
55
56
|
$ sudo gem install statsample-optimization
|
56
57
|
|
57
58
|
On Ubuntu, install build-essential and libgsl0-dev
|
58
|
-
using apt-get
|
59
|
+
using apt-get. Compile ruby 1.8 or 1.9 from
|
59
60
|
source code first.
|
60
61
|
|
61
62
|
$ sudo apt-get install build-essential libgsl0-dev
|
@@ -106,4 +107,5 @@ task :publicar_docs => [:clean, :docs] do
|
|
106
107
|
}
|
107
108
|
sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
108
109
|
end
|
110
|
+
|
109
111
|
# vim: syntax=Ruby
|
data/lib/statsample.rb
CHANGED
@@ -76,18 +76,15 @@ module Statsample
|
|
76
76
|
class SpreadsheetBase
|
77
77
|
class << self
|
78
78
|
def extract_fields(row)
|
79
|
-
=
|
80
|
-
fields=
|
81
|
-
row.to_a.collect {|c|
|
79
|
+
i=0;
|
80
|
+
fields=row.to_a.collect{|c|
|
82
81
|
if c.nil?
|
83
|
-
|
82
|
+
i+=1
|
83
|
+
"var%05d" % i
|
84
84
|
else
|
85
|
-
|
86
|
-
end
|
85
|
+
c.to_s.downcase
|
86
|
+
end
|
87
87
|
}
|
88
|
-
=end
|
89
|
-
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
|
90
|
-
fields=row.to_a.collect{|c| c.downcase}
|
91
88
|
fields.recode_repeated
|
92
89
|
end
|
93
90
|
|
@@ -132,6 +129,9 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
|
|
132
129
|
end
|
133
130
|
convert_to_scale_and_date(ds,fields)
|
134
131
|
ds.update_valid_data
|
132
|
+
fields.each {|f|
|
133
|
+
ds[f].name=f
|
134
|
+
}
|
135
135
|
ds
|
136
136
|
end
|
137
137
|
end
|
@@ -220,6 +220,9 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
|
|
220
220
|
end
|
221
221
|
convert_to_scale_and_date(ds, fields)
|
222
222
|
ds.update_valid_data
|
223
|
+
fields.each {|f|
|
224
|
+
ds[f].name=f
|
225
|
+
}
|
223
226
|
ds
|
224
227
|
end
|
225
228
|
end
|
data/lib/statsample/dataset.rb
CHANGED
@@ -64,7 +64,7 @@ module Statsample
|
|
64
64
|
# Ordered ids of vectors
|
65
65
|
attr_reader :fields
|
66
66
|
# Name of dataset
|
67
|
-
attr_accessor:name
|
67
|
+
attr_accessor :name
|
68
68
|
# Number of cases
|
69
69
|
attr_reader :cases
|
70
70
|
# Location of pointer on enumerations methods (like #each)
|
@@ -157,27 +157,30 @@ module Statsample
|
|
157
157
|
|
158
158
|
# Creates a copy of the given dataset, deleting all the cases with
|
159
159
|
# missing data on one of the vectors
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
160
|
+
# @param array of fields to include. No value include all fields
|
161
|
+
def dup_only_valid(*fields_to_include)
|
162
|
+
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
163
|
+
fields_to_include=fields_to_include[0]
|
164
|
+
end
|
165
|
+
fields_to_include=@fields if fields_to_include.size==0
|
166
|
+
if fields_to_include.any? {|f| @vectors[f].has_missing_data?}
|
167
|
+
ds=Dataset.new(fields_to_include)
|
168
|
+
fields_to_include.each {|f| ds[f].type=@vectors[f].type}
|
169
|
+
each {|row|
|
170
|
+
unless fields_to_include.any? {|f| @vectors[f].has_missing_data? and !@vectors[f].is_valid? row[f]}
|
171
|
+
row_2=fields_to_include.inject({}) {|ac,v| ac[v]=row[v]; ac}
|
172
|
+
ds.add_case(row_2)
|
173
|
+
end
|
165
174
|
}
|
166
|
-
ds.update_valid_data
|
167
175
|
else
|
168
|
-
ds=dup
|
176
|
+
ds=dup fields_to_include
|
169
177
|
end
|
170
178
|
ds
|
171
179
|
end
|
172
|
-
|
173
|
-
def from_to(from,to)
|
174
|
-
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
175
|
-
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
176
|
-
@fields.slice(@fields.index(from)..@fields.index(to))
|
177
|
-
end
|
180
|
+
|
178
181
|
# Returns a duplicate of the Database
|
179
182
|
# If fields given, only include those vectors.
|
180
|
-
#
|
183
|
+
# @param array of fields to include. No value include all fields
|
181
184
|
def dup(*fields_to_include)
|
182
185
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
183
186
|
fields_to_include=fields_to_include[0]
|
@@ -192,10 +195,20 @@ module Statsample
|
|
192
195
|
}
|
193
196
|
Dataset.new(vectors,fields)
|
194
197
|
end
|
198
|
+
|
199
|
+
|
200
|
+
# Returns an array with the fields from first argumen to last argument
|
201
|
+
def from_to(from,to)
|
202
|
+
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
203
|
+
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
204
|
+
@fields.slice(@fields.index(from)..@fields.index(to))
|
205
|
+
end
|
206
|
+
|
195
207
|
# Returns (when possible) a cheap copy of dataset.
|
196
208
|
# If no vector have missing values, returns original vectors.
|
197
209
|
# If missing values presents, uses Dataset.dup_only_valid
|
198
|
-
#
|
210
|
+
# @param array of fields to include. No value include all fields
|
211
|
+
|
199
212
|
def clone_only_valid(*fields_to_include)
|
200
213
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
201
214
|
fields_to_include=fields_to_include[0]
|
@@ -209,6 +222,7 @@ module Statsample
|
|
209
222
|
end
|
210
223
|
# Returns a shallow copy of Dataset.
|
211
224
|
# Object id will be distinct, but @vectors will be the same.
|
225
|
+
# @param array of fields to include. No value include all fields
|
212
226
|
def clone(*fields_to_include)
|
213
227
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
214
228
|
fields_to_include=fields_to_include[0]
|
@@ -267,7 +281,7 @@ module Statsample
|
|
267
281
|
}
|
268
282
|
Matrix.rows(rows)
|
269
283
|
end
|
270
|
-
# We have the same datasets if vectors and fields are the same
|
284
|
+
# We have the same datasets if +vectors+ and +fields+ are the same
|
271
285
|
def ==(d2)
|
272
286
|
@vectors==d2.vectors and @fields==d2.fields
|
273
287
|
end
|
@@ -336,10 +350,17 @@ module Statsample
|
|
336
350
|
@fields.each{|f| @vectors[f].set_valid_data}
|
337
351
|
check_length
|
338
352
|
end
|
339
|
-
# Delete vector named
|
340
|
-
def delete_vector(
|
341
|
-
|
342
|
-
|
353
|
+
# Delete vector named +name+. Multiple fields accepted.
|
354
|
+
def delete_vector(*args)
|
355
|
+
if args.size==1 and args[0].is_a? Array
|
356
|
+
names=args[0]
|
357
|
+
else
|
358
|
+
names=args
|
359
|
+
end
|
360
|
+
names.each do |name|
|
361
|
+
@fields.delete(name)
|
362
|
+
@vectors.delete(name)
|
363
|
+
end
|
343
364
|
end
|
344
365
|
|
345
366
|
def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
|
@@ -493,6 +514,7 @@ module Statsample
|
|
493
514
|
raise DatasetException.new(self, e)
|
494
515
|
end
|
495
516
|
end
|
517
|
+
|
496
518
|
# Returns each case as hash and index
|
497
519
|
def each_with_index # :yield: |case, i|
|
498
520
|
begin
|
@@ -530,11 +552,13 @@ module Statsample
|
|
530
552
|
}
|
531
553
|
@i=nil
|
532
554
|
end
|
533
|
-
# Set fields order. If you omit one or more vectors,
|
555
|
+
# Set fields order. If you omit one or more vectors, they are
|
556
|
+
# ordered by alphabetic order.
|
534
557
|
def fields=(f)
|
535
558
|
@fields=f
|
536
559
|
check_order
|
537
560
|
end
|
561
|
+
|
538
562
|
def check_order
|
539
563
|
if(@vectors.keys.sort!=@fields.sort)
|
540
564
|
@fields=@fields&@vectors.keys
|
@@ -640,16 +664,18 @@ module Statsample
|
|
640
664
|
def to_multiset_by_split_one_field(field)
|
641
665
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
642
666
|
factors=@vectors[field].factors
|
643
|
-
ms=Multiset.new_empty_vectors(@fields,factors)
|
667
|
+
ms=Multiset.new_empty_vectors(@fields, factors)
|
644
668
|
each {|c|
|
645
669
|
ms[c[field]].add_case(c,false)
|
646
670
|
}
|
647
671
|
#puts "Ingreso a los dataset"
|
648
672
|
ms.datasets.each {|k,ds|
|
649
673
|
ds.update_valid_data
|
674
|
+
ds.name=@vectors[field].labeling(k)
|
650
675
|
ds.vectors.each{|k1,v1|
|
651
676
|
# puts "Vector #{k1}:"+v1.to_s
|
652
677
|
v1.type=@vectors[k1].type
|
678
|
+
v1.name=@vectors[k1].name
|
653
679
|
}
|
654
680
|
}
|
655
681
|
ms
|
@@ -675,7 +701,10 @@ module Statsample
|
|
675
701
|
|
676
702
|
ms.datasets.each do |k,ds|
|
677
703
|
ds.update_valid_data
|
678
|
-
ds.vectors.each{|k1,v1|
|
704
|
+
ds.vectors.each{|k1,v1|
|
705
|
+
v1.type=@vectors[k1].type
|
706
|
+
v1.name=@vectors[k1].name
|
707
|
+
}
|
679
708
|
end
|
680
709
|
ms
|
681
710
|
|
@@ -6,9 +6,11 @@ module Statsample
|
|
6
6
|
def cronbach_alpha(ods)
|
7
7
|
ds=ods.dup_only_valid
|
8
8
|
n_items=ds.fields.size
|
9
|
+
return nil if n_items<=1
|
9
10
|
s2_items=ds.vectors.inject(0) {|ac,v|
|
10
11
|
ac+v[1].variance }
|
11
12
|
total=ds.vector_sum
|
13
|
+
|
12
14
|
(n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
|
13
15
|
end
|
14
16
|
# Calculate Chonbach's alpha for a given dataset
|
@@ -33,6 +35,7 @@ module Statsample
|
|
33
35
|
# from +r+ current reliability, achieved with
|
34
36
|
# +n+ items
|
35
37
|
def n_for_desired_reliability(r,r_d,n=1)
|
38
|
+
return nil if r.nil?
|
36
39
|
(r_d*(1-r)).quo(r*(1-r_d))*n
|
37
40
|
end
|
38
41
|
|
@@ -4,6 +4,17 @@ module Statsample
|
|
4
4
|
# According to Shrout & Fleiss (1979, p.422): "ICC is the correlation
|
5
5
|
# between one measurement (either a single rating or a mean of
|
6
6
|
# several ratings) on a target and another measurement obtained on that target"
|
7
|
+
# == Usage
|
8
|
+
# require 'statsample'
|
9
|
+
# size=1000
|
10
|
+
# a = size.times.map {rand(10)}.to_scale
|
11
|
+
# b = a.recode{|i|i+rand(4)-2}
|
12
|
+
# c =a.recode{|i|i+rand(4)-2}
|
13
|
+
# d = a.recode{|i|i+rand(4)-2}
|
14
|
+
# ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
15
|
+
# # Use :type attribute to set type to summarize
|
16
|
+
# icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
|
17
|
+
# puts icc.summary
|
7
18
|
#
|
8
19
|
# == Reference
|
9
20
|
# * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
|
@@ -15,14 +15,26 @@ module Statsample
|
|
15
15
|
attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
|
16
16
|
attr_accessor :name
|
17
17
|
def initialize(ds, opts=Hash.new)
|
18
|
-
@
|
19
|
-
|
18
|
+
@dumped=ds.fields.find_all {|f|
|
19
|
+
ds[f].variance==0
|
20
|
+
}
|
21
|
+
|
22
|
+
@ods=ds
|
23
|
+
@ds=ds.dup_only_valid(ds.fields - @dumped)
|
24
|
+
|
25
|
+
|
26
|
+
@k=@ds.fields.size
|
20
27
|
@total=@ds.vector_sum
|
28
|
+
|
29
|
+
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
30
|
+
|
21
31
|
@vector_mean=@ds.vector_mean
|
22
32
|
@item_mean=@vector_mean.mean
|
23
33
|
@item_sd=@vector_mean.sd
|
34
|
+
|
24
35
|
@mean=@total.mean
|
25
36
|
@median=@total.median
|
37
|
+
|
26
38
|
@skew=@total.skew
|
27
39
|
@kurtosis=@total.kurtosis
|
28
40
|
@sd = @total.sd
|
@@ -37,8 +49,8 @@ module Statsample
|
|
37
49
|
@variances_mean=@variances.mean
|
38
50
|
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
39
51
|
begin
|
40
|
-
@alpha = Statsample::Reliability.cronbach_alpha(ds)
|
41
|
-
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
|
52
|
+
@alpha = Statsample::Reliability.cronbach_alpha(@ds)
|
53
|
+
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
42
54
|
rescue => e
|
43
55
|
raise DatasetException.new(@ds,e), "Error calculating alpha"
|
44
56
|
end
|
@@ -62,7 +74,7 @@ module Statsample
|
|
62
74
|
end
|
63
75
|
total.each do |f,var|
|
64
76
|
var.each do |tot,v|
|
65
|
-
out[f][tot]=out[f][tot].
|
77
|
+
out[f][tot]=out[f][tot].quo(total[f][tot])
|
66
78
|
end
|
67
79
|
end
|
68
80
|
out
|
@@ -164,7 +176,7 @@ module Statsample
|
|
164
176
|
@sif||=stats_if_deleted_intern
|
165
177
|
end
|
166
178
|
def stats_if_deleted_intern # :nodoc:
|
167
|
-
|
179
|
+
return Hash.new if @ds.fields.size==1
|
168
180
|
@ds.fields.inject({}) do |a,v|
|
169
181
|
cov_2=@cov_m.submatrix(@ds.fields-[v])
|
170
182
|
#ds2=@ds.clone
|
@@ -182,12 +194,33 @@ module Statsample
|
|
182
194
|
end
|
183
195
|
def report_building(builder) #:nodoc:
|
184
196
|
builder.section(:name=>@name) do |s|
|
197
|
+
|
198
|
+
|
199
|
+
if @dumped.size>0
|
200
|
+
s.section(:name=>"Items with variance=0") do |s1|
|
201
|
+
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
202
|
+
t.row [_("Items"), @ods.fields.size]
|
203
|
+
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
204
|
+
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
|
209
|
+
@dumped.each do |f|
|
210
|
+
t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
|
185
217
|
s.table(:name=>_("Summary for %s") % @name) do |t|
|
186
|
-
|
218
|
+
t.row [_("Valid Items"), @ds.fields.size]
|
219
|
+
|
187
220
|
t.row [_("Valid cases"), @valid_n]
|
188
221
|
t.row [_("Sum mean"), "%0.4f" % @mean]
|
189
222
|
t.row [_("Sum sd"), "%0.4f" % @sd ]
|
190
|
-
t.row [_("Sum variance"), "%0.4f" % @variance]
|
223
|
+
# t.row [_("Sum variance"), "%0.4f" % @variance]
|
191
224
|
t.row [_("Sum median"), @median]
|
192
225
|
t.hr
|
193
226
|
t.row [_("Item mean"), "%0.4f" % @item_mean]
|
@@ -196,26 +229,51 @@ module Statsample
|
|
196
229
|
t.row [_("Skewness"), "%0.4f" % @skew]
|
197
230
|
t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
|
198
231
|
t.hr
|
199
|
-
t.row [_("Cronbach's alpha"), "%0.4f" % @alpha]
|
200
|
-
t.row [_("Standarized Cronbach's alpha"), "%0.4f" % @alpha_standarized]
|
232
|
+
t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
|
233
|
+
t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
|
201
234
|
t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
|
202
235
|
|
203
236
|
t.row [_("Variances mean"), "%g" % @variances_mean]
|
204
237
|
t.row [_("Covariances mean") , "%g" % @covariances_mean]
|
205
238
|
end
|
206
|
-
|
207
|
-
|
239
|
+
|
240
|
+
if (@alpha)
|
241
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.fields.size))
|
242
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.fields.size))
|
243
|
+
end
|
244
|
+
|
208
245
|
|
209
246
|
sid=stats_if_deleted
|
210
247
|
is=item_statistics
|
211
248
|
itc=item_total_correlation
|
212
249
|
|
213
|
-
|
214
250
|
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
215
251
|
@ds.fields.each do |f|
|
216
|
-
|
252
|
+
row=["#{@ds[f].name}(#{f})"]
|
253
|
+
if is[f]
|
254
|
+
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|
255
|
+
else
|
256
|
+
row+=["-","-"]
|
257
|
+
end
|
258
|
+
if sid[f]
|
259
|
+
row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
|
260
|
+
else
|
261
|
+
row+=%w{- - -}
|
262
|
+
end
|
263
|
+
if itc[f]
|
264
|
+
row+= [sprintf("%0.5f",itc[f])]
|
265
|
+
else
|
266
|
+
row+=['-']
|
267
|
+
end
|
268
|
+
if sid[f] and !sid[f][:alpha].nil?
|
269
|
+
row+=[sprintf("%0.5f",sid[f][:alpha])]
|
270
|
+
else
|
271
|
+
row+=["-"]
|
272
|
+
end
|
273
|
+
t.row row
|
217
274
|
end # end each
|
218
275
|
end # table
|
276
|
+
|
219
277
|
end # section
|
220
278
|
end # def
|
221
279
|
end # class
|
data/lib/statsample/vector.rb
CHANGED
@@ -105,12 +105,14 @@ module Statsample
|
|
105
105
|
private :check_type
|
106
106
|
|
107
107
|
# Return a vector usign the standarized values for data
|
108
|
-
# with sd with denominator n-1
|
108
|
+
# with sd with denominator n-1. With variance=0, returns nil
|
109
|
+
#
|
109
110
|
|
110
111
|
def vector_standarized(use_population=false)
|
111
112
|
check_type :scale
|
112
113
|
m=mean
|
113
114
|
sd=use_population ? sdp : sds
|
115
|
+
return nil if sd==0.0
|
114
116
|
@data_with_nils.collect{|x|
|
115
117
|
if !x.nil?
|
116
118
|
(x.to_f - m).quo(sd)
|
@@ -119,6 +121,7 @@ module Statsample
|
|
119
121
|
end
|
120
122
|
}.to_vector(:scale)
|
121
123
|
end
|
124
|
+
|
122
125
|
alias_method :standarized, :vector_standarized
|
123
126
|
# Return a vector with values replaced with the percentiles
|
124
127
|
# of each values
|
@@ -623,7 +626,7 @@ module Statsample
|
|
623
626
|
s.table(:name=>_("Distribution")) do |t|
|
624
627
|
frequencies.sort.each do |k,v|
|
625
628
|
key=labels.has_key?(k) ? labels[k]:k
|
626
|
-
t.row [key,v, ("%0.2f%%" % (v.quo(n_valid)*100))]
|
629
|
+
t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
|
627
630
|
end
|
628
631
|
end
|
629
632
|
s.text _("median: %s") % median.to_s if(@type==:ordinal)
|
data/references.txt
CHANGED
@@ -2,7 +2,7 @@ References
|
|
2
2
|
* Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
3
3
|
* Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
|
4
4
|
* Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
|
5
|
-
* Cochran(1972)
|
5
|
+
* Cochran, W.(1972). Sampling Techniques [spanish edition].
|
6
6
|
* Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
|
7
7
|
* Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
|
8
8
|
* Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
|
data/test/test_dataset.rb
CHANGED
@@ -340,6 +340,10 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
|
340
340
|
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
341
341
|
assert_equal(expected,ds2)
|
342
342
|
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
343
|
+
expected_partial=Statsample::Dataset.new({'v1'=>[1,3,4].to_vector(:scale), 'v3'=>[9, 11,12].to_vector(:scale)})
|
344
|
+
assert_equal(expected_partial, ds1.dup_only_valid(%w{v1 v3}))
|
345
|
+
|
346
|
+
|
343
347
|
end
|
344
348
|
def test_filter
|
345
349
|
@ds['age'].type=:scale
|
@@ -105,12 +105,20 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
105
105
|
assert_in_delta(0.676, @icc.icc_3_k_ci[0], 0.001)
|
106
106
|
assert_in_delta(0.986, @icc.icc_3_k_ci[1], 0.001)
|
107
107
|
end
|
108
|
+
should "incorrect type raises an error" do
|
109
|
+
assert_raise(::RuntimeError) do
|
110
|
+
@icc.type=:nonexistant_type
|
111
|
+
end
|
112
|
+
end
|
108
113
|
end
|
109
114
|
|
110
115
|
begin
|
111
116
|
require 'rserve'
|
112
117
|
require 'statsample/rserve_extension'
|
113
118
|
context "McGraw and Wong" do
|
119
|
+
teardown do
|
120
|
+
@r=$reliability_icc[:r].close
|
121
|
+
end
|
114
122
|
setup do
|
115
123
|
if($reliability_icc.nil?)
|
116
124
|
size=100
|
@@ -135,15 +143,17 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
135
143
|
icc_a_k=icc(ds,'t','a','a'))
|
136
144
|
")
|
137
145
|
@iccs=@r.eval('iccs').to_ruby
|
138
|
-
$reliability_icc={ :icc=>@icc, :iccs=>@iccs
|
146
|
+
$reliability_icc={ :icc=>@icc, :iccs=>@iccs, :r=>@r
|
139
147
|
}
|
148
|
+
|
140
149
|
end
|
141
150
|
@icc=$reliability_icc[:icc]
|
142
151
|
@iccs=$reliability_icc[:iccs]
|
143
|
-
|
152
|
+
@r=$reliability_icc[:r]
|
153
|
+
|
144
154
|
end
|
145
155
|
[:icc_1, :icc_k, :icc_c_1, :icc_c_k, :icc_a_1, :icc_a_k].each do |t|
|
146
|
-
context "ICC Type #{t}" do
|
156
|
+
context "ICC Type #{t} " do
|
147
157
|
should "value be correct" do
|
148
158
|
@icc.type=t
|
149
159
|
@r_icc=@iccs[t.to_s]
|
@@ -174,8 +184,10 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
174
184
|
@icc.type=t
|
175
185
|
@r_icc=@iccs[t.to_s]
|
176
186
|
assert_in_delta(@r_icc['lbound'],@icc.lbound)
|
177
|
-
assert_in_delta(@r_icc['ubound'],@icc.ubound)
|
178
|
-
|
187
|
+
assert_in_delta(@r_icc['ubound'],@icc.ubound)
|
188
|
+
end
|
189
|
+
should "summary generated" do
|
190
|
+
assert(@icc.summary.size>0)
|
179
191
|
end
|
180
192
|
end
|
181
193
|
end
|
@@ -9,6 +9,9 @@ class StatsampleRserveExtensionTestCase < MiniTest::Unit::TestCase
|
|
9
9
|
setup do
|
10
10
|
@r=Rserve::Connection.new
|
11
11
|
end
|
12
|
+
teardown do
|
13
|
+
@r.close
|
14
|
+
end
|
12
15
|
should "return a valid rexp for numeric vector" do
|
13
16
|
a=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
|
14
17
|
rexp=a.to_REXP
|
data/test/test_vector.rb
CHANGED
@@ -30,10 +30,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
30
30
|
@correct_b=@correct_b.to_scale
|
31
31
|
|
32
32
|
@common=lambda do |av,bv|
|
33
|
-
assert_equal(@correct_a,av)
|
34
|
-
assert_equal(@correct_b,bv)
|
35
|
-
assert(!av.has_missing_data
|
36
|
-
assert(!bv.has_missing_data
|
33
|
+
assert_equal(@correct_a, av, "A no es esperado")
|
34
|
+
assert_equal(@correct_b, bv, "B no es esperado")
|
35
|
+
assert(!av.has_missing_data?, "A tiene datos faltantes")
|
36
|
+
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
should "return correct only_valid" do
|
@@ -285,6 +285,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
285
285
|
assert_equal(0,vs.mean)
|
286
286
|
assert_equal(1,vs.sds)
|
287
287
|
end
|
288
|
+
def test_vector_standarized_with_zero_variance
|
289
|
+
v1=100.times.map {|i| 1}.to_scale
|
290
|
+
assert(v1.standarized.nil?)
|
291
|
+
end
|
288
292
|
|
289
293
|
def test_add
|
290
294
|
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 15
|
8
|
-
-
|
9
|
-
version: 0.15.
|
8
|
+
- 1
|
9
|
+
version: 0.15.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-
|
38
|
+
date: 2010-10-20 00:00:00 -03:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -181,8 +181,8 @@ dependencies:
|
|
181
181
|
segments:
|
182
182
|
- 2
|
183
183
|
- 6
|
184
|
-
-
|
185
|
-
version: 2.6.
|
184
|
+
- 2
|
185
|
+
version: 2.6.2
|
186
186
|
type: :development
|
187
187
|
version_requirements: *id011
|
188
188
|
description: |-
|
@@ -371,14 +371,14 @@ post_install_message: |
|
|
371
371
|
***************************************************
|
372
372
|
Thanks for installing statsample.
|
373
373
|
|
374
|
-
On *nix, you
|
374
|
+
On *nix, you could install statsample-optimization
|
375
375
|
to retrieve gems gsl, statistics2 and a C extension
|
376
376
|
to speed some methods.
|
377
377
|
|
378
378
|
$ sudo gem install statsample-optimization
|
379
379
|
|
380
380
|
On Ubuntu, install build-essential and libgsl0-dev
|
381
|
-
using apt-get
|
381
|
+
using apt-get. Compile ruby 1.8 or 1.9 from
|
382
382
|
source code first.
|
383
383
|
|
384
384
|
$ sudo apt-get install build-essential libgsl0-dev
|
metadata.gz.sig
CHANGED
Binary file
|