statsample 0.15.0 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +7 -0
- data/Rakefile +4 -2
- data/lib/statsample.rb +1 -1
- data/lib/statsample/converters.rb +12 -9
- data/lib/statsample/dataset.rb +53 -24
- data/lib/statsample/reliability.rb +3 -0
- data/lib/statsample/reliability/icc.rb +11 -0
- data/lib/statsample/reliability/scaleanalysis.rb +72 -14
- data/lib/statsample/vector.rb +5 -2
- data/references.txt +1 -1
- data/test/test_dataset.rb +4 -0
- data/test/test_reliability_icc.rb +17 -5
- data/test/test_rserve_extension.rb +3 -0
- data/test/test_vector.rb +8 -4
- metadata +7 -7
- metadata.gz.sig +0 -0
data.tar.gz.sig
CHANGED
Binary file
|
data/History.txt
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
=== 0.15.1 / 2010-10-20
|
2
|
+
* Statsample::Excel and Statsample::PlainText add name to vectors equal to field name
|
3
|
+
* Statsample::Dataset.delete_vector accept multiple fields.
|
4
|
+
* Statsample::Dataset.dup_only_valid allows duplication of specific fields
|
5
|
+
* ScaleAnalysis doesn't crash on one-item scales
|
6
|
+
* Updated references
|
7
|
+
|
1
8
|
=== 0.15.0 / 2010-09-07
|
2
9
|
* Added class Statsample::Reliability::ICC for calculation of Intra-class correlation (Shrout & Fleiss, 1979; McGraw & Wong, 1996). Tested with SPSS and R values.
|
3
10
|
* References: Updated and standarized references on many classes. Added grab_references.rb script, to create a list of references for library
|
data/Rakefile
CHANGED
@@ -22,6 +22,7 @@ end
|
|
22
22
|
task :release do
|
23
23
|
system %{git push origin master}
|
24
24
|
end
|
25
|
+
|
25
26
|
desc "Update pot/po files."
|
26
27
|
task "gettext:updatepo" do
|
27
28
|
require 'gettext/tools'
|
@@ -48,14 +49,14 @@ h=Hoe.spec('statsample') do
|
|
48
49
|
***************************************************
|
49
50
|
Thanks for installing statsample.
|
50
51
|
|
51
|
-
On *nix, you
|
52
|
+
On *nix, you could install statsample-optimization
|
52
53
|
to retrieve gems gsl, statistics2 and a C extension
|
53
54
|
to speed some methods.
|
54
55
|
|
55
56
|
$ sudo gem install statsample-optimization
|
56
57
|
|
57
58
|
On Ubuntu, install build-essential and libgsl0-dev
|
58
|
-
using apt-get
|
59
|
+
using apt-get. Compile ruby 1.8 or 1.9 from
|
59
60
|
source code first.
|
60
61
|
|
61
62
|
$ sudo apt-get install build-essential libgsl0-dev
|
@@ -106,4 +107,5 @@ task :publicar_docs => [:clean, :docs] do
|
|
106
107
|
}
|
107
108
|
sh %{rsync #{h.rsync_args} #{local_dir}/ #{host}:#{remote_dir}}
|
108
109
|
end
|
110
|
+
|
109
111
|
# vim: syntax=Ruby
|
data/lib/statsample.rb
CHANGED
@@ -76,18 +76,15 @@ module Statsample
|
|
76
76
|
class SpreadsheetBase
|
77
77
|
class << self
|
78
78
|
def extract_fields(row)
|
79
|
-
=
|
80
|
-
fields=
|
81
|
-
row.to_a.collect {|c|
|
79
|
+
i=0;
|
80
|
+
fields=row.to_a.collect{|c|
|
82
81
|
if c.nil?
|
83
|
-
|
82
|
+
i+=1
|
83
|
+
"var%05d" % i
|
84
84
|
else
|
85
|
-
|
86
|
-
end
|
85
|
+
c.to_s.downcase
|
86
|
+
end
|
87
87
|
}
|
88
|
-
=end
|
89
|
-
raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all {|c| c.nil?}.size>0
|
90
|
-
fields=row.to_a.collect{|c| c.downcase}
|
91
88
|
fields.recode_repeated
|
92
89
|
end
|
93
90
|
|
@@ -132,6 +129,9 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
|
|
132
129
|
end
|
133
130
|
convert_to_scale_and_date(ds,fields)
|
134
131
|
ds.update_valid_data
|
132
|
+
fields.each {|f|
|
133
|
+
ds[f].name=f
|
134
|
+
}
|
135
135
|
ds
|
136
136
|
end
|
137
137
|
end
|
@@ -220,6 +220,9 @@ raise "Should'nt be empty headers: [#{row.to_a.join(",")}]" if row.to_a.find_all
|
|
220
220
|
end
|
221
221
|
convert_to_scale_and_date(ds, fields)
|
222
222
|
ds.update_valid_data
|
223
|
+
fields.each {|f|
|
224
|
+
ds[f].name=f
|
225
|
+
}
|
223
226
|
ds
|
224
227
|
end
|
225
228
|
end
|
data/lib/statsample/dataset.rb
CHANGED
@@ -64,7 +64,7 @@ module Statsample
|
|
64
64
|
# Ordered ids of vectors
|
65
65
|
attr_reader :fields
|
66
66
|
# Name of dataset
|
67
|
-
attr_accessor:name
|
67
|
+
attr_accessor :name
|
68
68
|
# Number of cases
|
69
69
|
attr_reader :cases
|
70
70
|
# Location of pointer on enumerations methods (like #each)
|
@@ -157,27 +157,30 @@ module Statsample
|
|
157
157
|
|
158
158
|
# Creates a copy of the given dataset, deleting all the cases with
|
159
159
|
# missing data on one of the vectors
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
160
|
+
# @param array of fields to include. No value include all fields
|
161
|
+
def dup_only_valid(*fields_to_include)
|
162
|
+
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
163
|
+
fields_to_include=fields_to_include[0]
|
164
|
+
end
|
165
|
+
fields_to_include=@fields if fields_to_include.size==0
|
166
|
+
if fields_to_include.any? {|f| @vectors[f].has_missing_data?}
|
167
|
+
ds=Dataset.new(fields_to_include)
|
168
|
+
fields_to_include.each {|f| ds[f].type=@vectors[f].type}
|
169
|
+
each {|row|
|
170
|
+
unless fields_to_include.any? {|f| @vectors[f].has_missing_data? and !@vectors[f].is_valid? row[f]}
|
171
|
+
row_2=fields_to_include.inject({}) {|ac,v| ac[v]=row[v]; ac}
|
172
|
+
ds.add_case(row_2)
|
173
|
+
end
|
165
174
|
}
|
166
|
-
ds.update_valid_data
|
167
175
|
else
|
168
|
-
ds=dup
|
176
|
+
ds=dup fields_to_include
|
169
177
|
end
|
170
178
|
ds
|
171
179
|
end
|
172
|
-
|
173
|
-
def from_to(from,to)
|
174
|
-
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
175
|
-
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
176
|
-
@fields.slice(@fields.index(from)..@fields.index(to))
|
177
|
-
end
|
180
|
+
|
178
181
|
# Returns a duplicate of the Database
|
179
182
|
# If fields given, only include those vectors.
|
180
|
-
#
|
183
|
+
# @param array of fields to include. No value include all fields
|
181
184
|
def dup(*fields_to_include)
|
182
185
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
183
186
|
fields_to_include=fields_to_include[0]
|
@@ -192,10 +195,20 @@ module Statsample
|
|
192
195
|
}
|
193
196
|
Dataset.new(vectors,fields)
|
194
197
|
end
|
198
|
+
|
199
|
+
|
200
|
+
# Returns an array with the fields from first argumen to last argument
|
201
|
+
def from_to(from,to)
|
202
|
+
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
203
|
+
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
204
|
+
@fields.slice(@fields.index(from)..@fields.index(to))
|
205
|
+
end
|
206
|
+
|
195
207
|
# Returns (when possible) a cheap copy of dataset.
|
196
208
|
# If no vector have missing values, returns original vectors.
|
197
209
|
# If missing values presents, uses Dataset.dup_only_valid
|
198
|
-
#
|
210
|
+
# @param array of fields to include. No value include all fields
|
211
|
+
|
199
212
|
def clone_only_valid(*fields_to_include)
|
200
213
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
201
214
|
fields_to_include=fields_to_include[0]
|
@@ -209,6 +222,7 @@ module Statsample
|
|
209
222
|
end
|
210
223
|
# Returns a shallow copy of Dataset.
|
211
224
|
# Object id will be distinct, but @vectors will be the same.
|
225
|
+
# @param array of fields to include. No value include all fields
|
212
226
|
def clone(*fields_to_include)
|
213
227
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
214
228
|
fields_to_include=fields_to_include[0]
|
@@ -267,7 +281,7 @@ module Statsample
|
|
267
281
|
}
|
268
282
|
Matrix.rows(rows)
|
269
283
|
end
|
270
|
-
# We have the same datasets if vectors and fields are the same
|
284
|
+
# We have the same datasets if +vectors+ and +fields+ are the same
|
271
285
|
def ==(d2)
|
272
286
|
@vectors==d2.vectors and @fields==d2.fields
|
273
287
|
end
|
@@ -336,10 +350,17 @@ module Statsample
|
|
336
350
|
@fields.each{|f| @vectors[f].set_valid_data}
|
337
351
|
check_length
|
338
352
|
end
|
339
|
-
# Delete vector named
|
340
|
-
def delete_vector(
|
341
|
-
|
342
|
-
|
353
|
+
# Delete vector named +name+. Multiple fields accepted.
|
354
|
+
def delete_vector(*args)
|
355
|
+
if args.size==1 and args[0].is_a? Array
|
356
|
+
names=args[0]
|
357
|
+
else
|
358
|
+
names=args
|
359
|
+
end
|
360
|
+
names.each do |name|
|
361
|
+
@fields.delete(name)
|
362
|
+
@vectors.delete(name)
|
363
|
+
end
|
343
364
|
end
|
344
365
|
|
345
366
|
def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
|
@@ -493,6 +514,7 @@ module Statsample
|
|
493
514
|
raise DatasetException.new(self, e)
|
494
515
|
end
|
495
516
|
end
|
517
|
+
|
496
518
|
# Returns each case as hash and index
|
497
519
|
def each_with_index # :yield: |case, i|
|
498
520
|
begin
|
@@ -530,11 +552,13 @@ module Statsample
|
|
530
552
|
}
|
531
553
|
@i=nil
|
532
554
|
end
|
533
|
-
# Set fields order. If you omit one or more vectors,
|
555
|
+
# Set fields order. If you omit one or more vectors, they are
|
556
|
+
# ordered by alphabetic order.
|
534
557
|
def fields=(f)
|
535
558
|
@fields=f
|
536
559
|
check_order
|
537
560
|
end
|
561
|
+
|
538
562
|
def check_order
|
539
563
|
if(@vectors.keys.sort!=@fields.sort)
|
540
564
|
@fields=@fields&@vectors.keys
|
@@ -640,16 +664,18 @@ module Statsample
|
|
640
664
|
def to_multiset_by_split_one_field(field)
|
641
665
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
642
666
|
factors=@vectors[field].factors
|
643
|
-
ms=Multiset.new_empty_vectors(@fields,factors)
|
667
|
+
ms=Multiset.new_empty_vectors(@fields, factors)
|
644
668
|
each {|c|
|
645
669
|
ms[c[field]].add_case(c,false)
|
646
670
|
}
|
647
671
|
#puts "Ingreso a los dataset"
|
648
672
|
ms.datasets.each {|k,ds|
|
649
673
|
ds.update_valid_data
|
674
|
+
ds.name=@vectors[field].labeling(k)
|
650
675
|
ds.vectors.each{|k1,v1|
|
651
676
|
# puts "Vector #{k1}:"+v1.to_s
|
652
677
|
v1.type=@vectors[k1].type
|
678
|
+
v1.name=@vectors[k1].name
|
653
679
|
}
|
654
680
|
}
|
655
681
|
ms
|
@@ -675,7 +701,10 @@ module Statsample
|
|
675
701
|
|
676
702
|
ms.datasets.each do |k,ds|
|
677
703
|
ds.update_valid_data
|
678
|
-
ds.vectors.each{|k1,v1|
|
704
|
+
ds.vectors.each{|k1,v1|
|
705
|
+
v1.type=@vectors[k1].type
|
706
|
+
v1.name=@vectors[k1].name
|
707
|
+
}
|
679
708
|
end
|
680
709
|
ms
|
681
710
|
|
@@ -6,9 +6,11 @@ module Statsample
|
|
6
6
|
def cronbach_alpha(ods)
|
7
7
|
ds=ods.dup_only_valid
|
8
8
|
n_items=ds.fields.size
|
9
|
+
return nil if n_items<=1
|
9
10
|
s2_items=ds.vectors.inject(0) {|ac,v|
|
10
11
|
ac+v[1].variance }
|
11
12
|
total=ds.vector_sum
|
13
|
+
|
12
14
|
(n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
|
13
15
|
end
|
14
16
|
# Calculate Chonbach's alpha for a given dataset
|
@@ -33,6 +35,7 @@ module Statsample
|
|
33
35
|
# from +r+ current reliability, achieved with
|
34
36
|
# +n+ items
|
35
37
|
def n_for_desired_reliability(r,r_d,n=1)
|
38
|
+
return nil if r.nil?
|
36
39
|
(r_d*(1-r)).quo(r*(1-r_d))*n
|
37
40
|
end
|
38
41
|
|
@@ -4,6 +4,17 @@ module Statsample
|
|
4
4
|
# According to Shrout & Fleiss (1979, p.422): "ICC is the correlation
|
5
5
|
# between one measurement (either a single rating or a mean of
|
6
6
|
# several ratings) on a target and another measurement obtained on that target"
|
7
|
+
# == Usage
|
8
|
+
# require 'statsample'
|
9
|
+
# size=1000
|
10
|
+
# a = size.times.map {rand(10)}.to_scale
|
11
|
+
# b = a.recode{|i|i+rand(4)-2}
|
12
|
+
# c =a.recode{|i|i+rand(4)-2}
|
13
|
+
# d = a.recode{|i|i+rand(4)-2}
|
14
|
+
# ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
|
15
|
+
# # Use :type attribute to set type to summarize
|
16
|
+
# icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
|
17
|
+
# puts icc.summary
|
7
18
|
#
|
8
19
|
# == Reference
|
9
20
|
# * Shrout,P. & Fleiss, J. (1979). Intraclass Correlation: Uses in assessing rater reliability. Psychological Bulletin, 86(2), 420-428
|
@@ -15,14 +15,26 @@ module Statsample
|
|
15
15
|
attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
|
16
16
|
attr_accessor :name
|
17
17
|
def initialize(ds, opts=Hash.new)
|
18
|
-
@
|
19
|
-
|
18
|
+
@dumped=ds.fields.find_all {|f|
|
19
|
+
ds[f].variance==0
|
20
|
+
}
|
21
|
+
|
22
|
+
@ods=ds
|
23
|
+
@ds=ds.dup_only_valid(ds.fields - @dumped)
|
24
|
+
|
25
|
+
|
26
|
+
@k=@ds.fields.size
|
20
27
|
@total=@ds.vector_sum
|
28
|
+
|
29
|
+
@o_total=@dumped.size > 0 ? @ods.vector_sum : nil
|
30
|
+
|
21
31
|
@vector_mean=@ds.vector_mean
|
22
32
|
@item_mean=@vector_mean.mean
|
23
33
|
@item_sd=@vector_mean.sd
|
34
|
+
|
24
35
|
@mean=@total.mean
|
25
36
|
@median=@total.median
|
37
|
+
|
26
38
|
@skew=@total.skew
|
27
39
|
@kurtosis=@total.kurtosis
|
28
40
|
@sd = @total.sd
|
@@ -37,8 +49,8 @@ module Statsample
|
|
37
49
|
@variances_mean=@variances.mean
|
38
50
|
@covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
|
39
51
|
begin
|
40
|
-
@alpha = Statsample::Reliability.cronbach_alpha(ds)
|
41
|
-
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(ds)
|
52
|
+
@alpha = Statsample::Reliability.cronbach_alpha(@ds)
|
53
|
+
@alpha_standarized = Statsample::Reliability.cronbach_alpha_standarized(@ds)
|
42
54
|
rescue => e
|
43
55
|
raise DatasetException.new(@ds,e), "Error calculating alpha"
|
44
56
|
end
|
@@ -62,7 +74,7 @@ module Statsample
|
|
62
74
|
end
|
63
75
|
total.each do |f,var|
|
64
76
|
var.each do |tot,v|
|
65
|
-
out[f][tot]=out[f][tot].
|
77
|
+
out[f][tot]=out[f][tot].quo(total[f][tot])
|
66
78
|
end
|
67
79
|
end
|
68
80
|
out
|
@@ -164,7 +176,7 @@ module Statsample
|
|
164
176
|
@sif||=stats_if_deleted_intern
|
165
177
|
end
|
166
178
|
def stats_if_deleted_intern # :nodoc:
|
167
|
-
|
179
|
+
return Hash.new if @ds.fields.size==1
|
168
180
|
@ds.fields.inject({}) do |a,v|
|
169
181
|
cov_2=@cov_m.submatrix(@ds.fields-[v])
|
170
182
|
#ds2=@ds.clone
|
@@ -182,12 +194,33 @@ module Statsample
|
|
182
194
|
end
|
183
195
|
def report_building(builder) #:nodoc:
|
184
196
|
builder.section(:name=>@name) do |s|
|
197
|
+
|
198
|
+
|
199
|
+
if @dumped.size>0
|
200
|
+
s.section(:name=>"Items with variance=0") do |s1|
|
201
|
+
s.table(:name=>_("Summary for %s with all items") % @name) do |t|
|
202
|
+
t.row [_("Items"), @ods.fields.size]
|
203
|
+
t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
|
204
|
+
t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
|
205
|
+
|
206
|
+
end
|
207
|
+
|
208
|
+
s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
|
209
|
+
@dumped.each do |f|
|
210
|
+
t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
|
185
217
|
s.table(:name=>_("Summary for %s") % @name) do |t|
|
186
|
-
|
218
|
+
t.row [_("Valid Items"), @ds.fields.size]
|
219
|
+
|
187
220
|
t.row [_("Valid cases"), @valid_n]
|
188
221
|
t.row [_("Sum mean"), "%0.4f" % @mean]
|
189
222
|
t.row [_("Sum sd"), "%0.4f" % @sd ]
|
190
|
-
t.row [_("Sum variance"), "%0.4f" % @variance]
|
223
|
+
# t.row [_("Sum variance"), "%0.4f" % @variance]
|
191
224
|
t.row [_("Sum median"), @median]
|
192
225
|
t.hr
|
193
226
|
t.row [_("Item mean"), "%0.4f" % @item_mean]
|
@@ -196,26 +229,51 @@ module Statsample
|
|
196
229
|
t.row [_("Skewness"), "%0.4f" % @skew]
|
197
230
|
t.row [_("Kurtosis"), "%0.4f" % @kurtosis]
|
198
231
|
t.hr
|
199
|
-
t.row [_("Cronbach's alpha"), "%0.4f" % @alpha]
|
200
|
-
t.row [_("Standarized Cronbach's alpha"), "%0.4f" % @alpha_standarized]
|
232
|
+
t.row [_("Cronbach's alpha"), @alpha ? ("%0.4f" % @alpha) : "--"]
|
233
|
+
t.row [_("Standarized Cronbach's alpha"), @alpha_standarized ? ("%0.4f" % @alpha_standarized) : "--" ]
|
201
234
|
t.row [_("Mean rpb"), "%0.4f" % mean_rpb]
|
202
235
|
|
203
236
|
t.row [_("Variances mean"), "%g" % @variances_mean]
|
204
237
|
t.row [_("Covariances mean") , "%g" % @covariances_mean]
|
205
238
|
end
|
206
|
-
|
207
|
-
|
239
|
+
|
240
|
+
if (@alpha)
|
241
|
+
s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.fields.size))
|
242
|
+
s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.fields.size))
|
243
|
+
end
|
244
|
+
|
208
245
|
|
209
246
|
sid=stats_if_deleted
|
210
247
|
is=item_statistics
|
211
248
|
itc=item_total_correlation
|
212
249
|
|
213
|
-
|
214
250
|
s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
|
215
251
|
@ds.fields.each do |f|
|
216
|
-
|
252
|
+
row=["#{@ds[f].name}(#{f})"]
|
253
|
+
if is[f]
|
254
|
+
row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]
|
255
|
+
else
|
256
|
+
row+=["-","-"]
|
257
|
+
end
|
258
|
+
if sid[f]
|
259
|
+
row+= [sprintf("%0.5f",sid[f][:mean]), sprintf("%0.5f",sid[f][:variance_sample]), sprintf("%0.5f",sid[f][:sds])]
|
260
|
+
else
|
261
|
+
row+=%w{- - -}
|
262
|
+
end
|
263
|
+
if itc[f]
|
264
|
+
row+= [sprintf("%0.5f",itc[f])]
|
265
|
+
else
|
266
|
+
row+=['-']
|
267
|
+
end
|
268
|
+
if sid[f] and !sid[f][:alpha].nil?
|
269
|
+
row+=[sprintf("%0.5f",sid[f][:alpha])]
|
270
|
+
else
|
271
|
+
row+=["-"]
|
272
|
+
end
|
273
|
+
t.row row
|
217
274
|
end # end each
|
218
275
|
end # table
|
276
|
+
|
219
277
|
end # section
|
220
278
|
end # def
|
221
279
|
end # class
|
data/lib/statsample/vector.rb
CHANGED
@@ -105,12 +105,14 @@ module Statsample
|
|
105
105
|
private :check_type
|
106
106
|
|
107
107
|
# Return a vector usign the standarized values for data
|
108
|
-
# with sd with denominator n-1
|
108
|
+
# with sd with denominator n-1. With variance=0, returns nil
|
109
|
+
#
|
109
110
|
|
110
111
|
def vector_standarized(use_population=false)
|
111
112
|
check_type :scale
|
112
113
|
m=mean
|
113
114
|
sd=use_population ? sdp : sds
|
115
|
+
return nil if sd==0.0
|
114
116
|
@data_with_nils.collect{|x|
|
115
117
|
if !x.nil?
|
116
118
|
(x.to_f - m).quo(sd)
|
@@ -119,6 +121,7 @@ module Statsample
|
|
119
121
|
end
|
120
122
|
}.to_vector(:scale)
|
121
123
|
end
|
124
|
+
|
122
125
|
alias_method :standarized, :vector_standarized
|
123
126
|
# Return a vector with values replaced with the percentiles
|
124
127
|
# of each values
|
@@ -623,7 +626,7 @@ module Statsample
|
|
623
626
|
s.table(:name=>_("Distribution")) do |t|
|
624
627
|
frequencies.sort.each do |k,v|
|
625
628
|
key=labels.has_key?(k) ? labels[k]:k
|
626
|
-
t.row [key,v, ("%0.2f%%" % (v.quo(n_valid)*100))]
|
629
|
+
t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
|
627
630
|
end
|
628
631
|
end
|
629
632
|
s.text _("median: %s") % median.to_s if(@type==:ordinal)
|
data/references.txt
CHANGED
@@ -2,7 +2,7 @@ References
|
|
2
2
|
* Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. <em>Psychological Methods, 8</em>(2), 129-148.
|
3
3
|
* Azen, R. & Budescu, D.V. (2006). Comparing predictors in Multivariate Regression Models: An extension of Dominance Analysis. <em>Journal of Educational and Behavioral Statistics, 31</em>(2), 157-180.
|
4
4
|
* Budescu, D. V. (1993). Dominance analysis: a new approach to the problem of relative importance of predictors in multiple regression. <em>Psychological Bulletin, 114</em>, 542-551.
|
5
|
-
* Cochran(1972)
|
5
|
+
* Cochran, W.(1972). Sampling Techniques [spanish edition].
|
6
6
|
* Cohen et al. (2003). Applied Multiple Reggression / Correlation Analysis for the Behavioral Sciences
|
7
7
|
* Dinneen, L., & Blakesley, B. (1973). Algorithm AS 62: A Generator for the Sampling Distribution of the Mann- Whitney U Statistic. <em>Journal of the Royal Statistical Society, 22</em>(2), 269-273
|
8
8
|
* Dziuban, C., & Shirkey E. (1974). When is a correlation matrix appropriate for factor analysis? Some decision rules. Psychological Bulletin, 81(6), 358-361.
|
data/test/test_dataset.rb
CHANGED
@@ -340,6 +340,10 @@ class StatsampleDatasetTestCase < MiniTest::Unit::TestCase
|
|
340
340
|
expected=Statsample::Dataset.new({'v1'=>[1,4].to_vector(:scale), 'v2'=> [5,8].to_vector(:scale), 'v3'=>[9, 12].to_vector(:scale)})
|
341
341
|
assert_equal(expected,ds2)
|
342
342
|
assert_equal(expected.vectors.values,Statsample::only_valid(v1,v2,v3))
|
343
|
+
expected_partial=Statsample::Dataset.new({'v1'=>[1,3,4].to_vector(:scale), 'v3'=>[9, 11,12].to_vector(:scale)})
|
344
|
+
assert_equal(expected_partial, ds1.dup_only_valid(%w{v1 v3}))
|
345
|
+
|
346
|
+
|
343
347
|
end
|
344
348
|
def test_filter
|
345
349
|
@ds['age'].type=:scale
|
@@ -105,12 +105,20 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
105
105
|
assert_in_delta(0.676, @icc.icc_3_k_ci[0], 0.001)
|
106
106
|
assert_in_delta(0.986, @icc.icc_3_k_ci[1], 0.001)
|
107
107
|
end
|
108
|
+
should "incorrect type raises an error" do
|
109
|
+
assert_raise(::RuntimeError) do
|
110
|
+
@icc.type=:nonexistant_type
|
111
|
+
end
|
112
|
+
end
|
108
113
|
end
|
109
114
|
|
110
115
|
begin
|
111
116
|
require 'rserve'
|
112
117
|
require 'statsample/rserve_extension'
|
113
118
|
context "McGraw and Wong" do
|
119
|
+
teardown do
|
120
|
+
@r=$reliability_icc[:r].close
|
121
|
+
end
|
114
122
|
setup do
|
115
123
|
if($reliability_icc.nil?)
|
116
124
|
size=100
|
@@ -135,15 +143,17 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
135
143
|
icc_a_k=icc(ds,'t','a','a'))
|
136
144
|
")
|
137
145
|
@iccs=@r.eval('iccs').to_ruby
|
138
|
-
$reliability_icc={ :icc=>@icc, :iccs=>@iccs
|
146
|
+
$reliability_icc={ :icc=>@icc, :iccs=>@iccs, :r=>@r
|
139
147
|
}
|
148
|
+
|
140
149
|
end
|
141
150
|
@icc=$reliability_icc[:icc]
|
142
151
|
@iccs=$reliability_icc[:iccs]
|
143
|
-
|
152
|
+
@r=$reliability_icc[:r]
|
153
|
+
|
144
154
|
end
|
145
155
|
[:icc_1, :icc_k, :icc_c_1, :icc_c_k, :icc_a_1, :icc_a_k].each do |t|
|
146
|
-
context "ICC Type #{t}" do
|
156
|
+
context "ICC Type #{t} " do
|
147
157
|
should "value be correct" do
|
148
158
|
@icc.type=t
|
149
159
|
@r_icc=@iccs[t.to_s]
|
@@ -174,8 +184,10 @@ class StatsampleReliabilityIccTestCase < MiniTest::Unit::TestCase
|
|
174
184
|
@icc.type=t
|
175
185
|
@r_icc=@iccs[t.to_s]
|
176
186
|
assert_in_delta(@r_icc['lbound'],@icc.lbound)
|
177
|
-
assert_in_delta(@r_icc['ubound'],@icc.ubound)
|
178
|
-
|
187
|
+
assert_in_delta(@r_icc['ubound'],@icc.ubound)
|
188
|
+
end
|
189
|
+
should "summary generated" do
|
190
|
+
assert(@icc.summary.size>0)
|
179
191
|
end
|
180
192
|
end
|
181
193
|
end
|
@@ -9,6 +9,9 @@ class StatsampleRserveExtensionTestCase < MiniTest::Unit::TestCase
|
|
9
9
|
setup do
|
10
10
|
@r=Rserve::Connection.new
|
11
11
|
end
|
12
|
+
teardown do
|
13
|
+
@r.close
|
14
|
+
end
|
12
15
|
should "return a valid rexp for numeric vector" do
|
13
16
|
a=100.times.map {|i| rand()>0.9 ? nil : i+rand() }.to_scale
|
14
17
|
rexp=a.to_REXP
|
data/test/test_vector.rb
CHANGED
@@ -30,10 +30,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
30
30
|
@correct_b=@correct_b.to_scale
|
31
31
|
|
32
32
|
@common=lambda do |av,bv|
|
33
|
-
assert_equal(@correct_a,av)
|
34
|
-
assert_equal(@correct_b,bv)
|
35
|
-
assert(!av.has_missing_data
|
36
|
-
assert(!bv.has_missing_data
|
33
|
+
assert_equal(@correct_a, av, "A no es esperado")
|
34
|
+
assert_equal(@correct_b, bv, "B no es esperado")
|
35
|
+
assert(!av.has_missing_data?, "A tiene datos faltantes")
|
36
|
+
assert(!bv.has_missing_data?, "b tiene datos faltantes")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
should "return correct only_valid" do
|
@@ -285,6 +285,10 @@ class StatsampleTestVector < MiniTest::Unit::TestCase
|
|
285
285
|
assert_equal(0,vs.mean)
|
286
286
|
assert_equal(1,vs.sds)
|
287
287
|
end
|
288
|
+
def test_vector_standarized_with_zero_variance
|
289
|
+
v1=100.times.map {|i| 1}.to_scale
|
290
|
+
assert(v1.standarized.nil?)
|
291
|
+
end
|
288
292
|
|
289
293
|
def test_add
|
290
294
|
a=Statsample::Vector.new([1,2,3,4,5], :scale)
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 15
|
8
|
-
-
|
9
|
-
version: 0.15.
|
8
|
+
- 1
|
9
|
+
version: 0.15.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Claudio Bustos
|
@@ -35,7 +35,7 @@ cert_chain:
|
|
35
35
|
rpP0jjs0
|
36
36
|
-----END CERTIFICATE-----
|
37
37
|
|
38
|
-
date: 2010-
|
38
|
+
date: 2010-10-20 00:00:00 -03:00
|
39
39
|
default_executable:
|
40
40
|
dependencies:
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -181,8 +181,8 @@ dependencies:
|
|
181
181
|
segments:
|
182
182
|
- 2
|
183
183
|
- 6
|
184
|
-
-
|
185
|
-
version: 2.6.
|
184
|
+
- 2
|
185
|
+
version: 2.6.2
|
186
186
|
type: :development
|
187
187
|
version_requirements: *id011
|
188
188
|
description: |-
|
@@ -371,14 +371,14 @@ post_install_message: |
|
|
371
371
|
***************************************************
|
372
372
|
Thanks for installing statsample.
|
373
373
|
|
374
|
-
On *nix, you
|
374
|
+
On *nix, you could install statsample-optimization
|
375
375
|
to retrieve gems gsl, statistics2 and a C extension
|
376
376
|
to speed some methods.
|
377
377
|
|
378
378
|
$ sudo gem install statsample-optimization
|
379
379
|
|
380
380
|
On Ubuntu, install build-essential and libgsl0-dev
|
381
|
-
using apt-get
|
381
|
+
using apt-get. Compile ruby 1.8 or 1.9 from
|
382
382
|
source code first.
|
383
383
|
|
384
384
|
$ sudo apt-get install build-essential libgsl0-dev
|
metadata.gz.sig
CHANGED
Binary file
|